[llvm] 50578cf - [AMDGPU] Add f16 to i1 CodeGen patterns.

Wen-Heng Chung via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 4 09:44:27 PST 2021


Author: Wen-Heng (Jack) Chung
Date: 2021-02-04T11:44:18-06:00
New Revision: 50578cf3392234016410077593aec397f436258d

URL: https://github.com/llvm/llvm-project/commit/50578cf3392234016410077593aec397f436258d
DIFF: https://github.com/llvm/llvm-project/commit/50578cf3392234016410077593aec397f436258d.diff

LOG: [AMDGPU] Add f16 to i1 CodeGen patterns.

Follow patterns used for f32 and f64 types.

Differential Revision: https://reviews.llvm.org/D95964

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
    llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
    llvm/test/CodeGen/AMDGPU/fptoui.f16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ecb875debefd..81cc2a1c15c7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1795,6 +1795,8 @@ class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, S
   (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
 >;
 
+def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
 def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
 def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
 def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
index f8171351781a..648efefd4d36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
@@ -130,3 +130,75 @@ body: |
     %3:vgpr(s32) = G_FPTOSI %2
     $vgpr0 = COPY %3
 ...
+
+---
+name: fptosi_s16_to_s1_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fptosi_s16_to_s1_vv
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+    ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit %2
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s16) = G_TRUNC %0
+    %2:vgpr(s32) = G_FPTOSI %1
+    %3:vgpr(s1)  = G_TRUNC %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptosi_s16_to_s1_vs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: fptosi_s16_to_s1_vs
+    ; GCN: liveins: $sgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+    ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit %2
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s16) = G_TRUNC %0
+    %2:vgpr(s32) = G_FPTOSI %1
+    %3:vgpr(s1)  = G_TRUNC %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptosi_s16_to_s1_fneg_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+    ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
+    ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
+    ; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit %3
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s16) = G_TRUNC %0
+    %2:vgpr(s16) = G_FNEG %1
+    %3:vgpr(s32) = G_FPTOSI %2
+    %4:vgpr(s1)  = G_TRUNC %3
+    S_ENDPGM 0, implicit %4
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index e6dd3fbb04a4..85592b46b4f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -103,3 +103,75 @@ body: |
     %3:vgpr(s32) = G_FPTOUI %2
     $vgpr0 = COPY %3
 ...
+
+---
+name: fptoui_s16_to_s1_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fptoui_s16_to_s1_vv
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+    ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit %2
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s16) = G_TRUNC %0
+    %2:vgpr(s32) = G_FPTOUI %1
+    %3:vgpr(s1)  = G_TRUNC %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptoui_s16_to_s1_vs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: fptoui_s16_to_s1_vs
+    ; GCN: liveins: $sgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+    ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit %2
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s16) = G_TRUNC %0
+    %2:vgpr(s32) = G_FPTOUI %1
+    %3:vgpr(s1)  = G_TRUNC %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptoui_s16_to_s1_fneg_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+    ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
+    ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
+    ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit %3
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s16) = G_TRUNC %0
+    %2:vgpr(s16) = G_FNEG %1
+    %3:vgpr(s32) = G_FPTOUI %2
+    %4:vgpr(s1)  = G_TRUNC %3
+    S_ENDPGM 0, implicit %4
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
index 74f6f01a4548..3ad600c8c31c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
@@ -644,3 +644,27 @@ body: |
     %1:_(<2 x s64>) = G_FPTOSI %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
 ...
+
+---
+name: test_fptosi_s16_to_s1
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; SI-LABEL: name: test_fptosi_s16_to_s1
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+    ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+    ; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+    ; VI-LABEL: name: test_fptosi_s16_to_s1
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+    ; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s1)  = G_FPTOSI %1
+    S_ENDPGM 0, implicit %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
index 87482c0b48d8..f731bbf4f9cc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
@@ -814,3 +814,27 @@ body: |
     %1:_(<2 x s64>) = G_FPTOUI %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
 ...
+
+---
+name: test_fptoui_s16_to_s1
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; SI-LABEL: name: test_fptoui_s16_to_s1
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+    ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+    ; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+    ; VI-LABEL: name: test_fptoui_s16_to_s1
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+    ; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s1)  = G_FPTOSI %1
+    S_ENDPGM 0, implicit %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index 7fa4f3c32ba8..343444c3cc85 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -132,3 +132,16 @@ entry:
   store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
   ret void
 }
+
+; GCN-LABEL: {{^}}fptosi_f16_to_i1:
+; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
+; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}}
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
+; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}
+; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
+define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+entry:
+  %conv = fptosi half %in to i1
+  store i1 %conv, i1 addrspace(1)* %out
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
index 11c946b308b0..8bec6d795b99 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -130,3 +130,16 @@ entry:
   store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
   ret void
 }
+
+; GCN-LABEL: {{^}}fptoui_f16_to_i1:
+; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
+; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}}
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
+; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
+; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
+define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+entry:
+  %conv = fptoui half %in to i1
+  store i1 %conv, i1 addrspace(1)* %out
+  ret void
+}


        


More information about the llvm-commits mailing list