[llvm] 50578cf - [AMDGPU] Add f16 to i1 CodeGen patterns.
Wen-Heng Chung via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 4 09:44:27 PST 2021
Author: Wen-Heng (Jack) Chung
Date: 2021-02-04T11:44:18-06:00
New Revision: 50578cf3392234016410077593aec397f436258d
URL: https://github.com/llvm/llvm-project/commit/50578cf3392234016410077593aec397f436258d
DIFF: https://github.com/llvm/llvm-project/commit/50578cf3392234016410077593aec397f436258d.diff
LOG: [AMDGPU] Add f16 to i1 CodeGen patterns.
Follow patterns used for f32 and f64 types.
Differential Revision: https://reviews.llvm.org/D95964
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ecb875debefd..81cc2a1c15c7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1795,6 +1795,8 @@ class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, S
(i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
>;
+def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
index f8171351781a..648efefd4d36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
@@ -130,3 +130,75 @@ body: |
%3:vgpr(s32) = G_FPTOSI %2
$vgpr0 = COPY %3
...
+
+---
+name: fptosi_s16_to_s1_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: fptosi_s16_to_s1_vv
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit %2
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s16) = G_TRUNC %0
+ %2:vgpr(s32) = G_FPTOSI %1
+ %3:vgpr(s1) = G_TRUNC %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptosi_s16_to_s1_vs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GCN-LABEL: name: fptosi_s16_to_s1_vs
+ ; GCN: liveins: $sgpr0
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0
+ %2:vgpr(s32) = G_FPTOSI %1
+ %3:vgpr(s1) = G_TRUNC %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptosi_s16_to_s1_fneg_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
+ ; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit %3
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s16) = G_TRUNC %0
+ %2:vgpr(s16) = G_FNEG %1
+ %3:vgpr(s32) = G_FPTOSI %2
+ %4:vgpr(s1) = G_TRUNC %3
+ S_ENDPGM 0, implicit %4
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index e6dd3fbb04a4..85592b46b4f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -103,3 +103,75 @@ body: |
%3:vgpr(s32) = G_FPTOUI %2
$vgpr0 = COPY %3
...
+
+---
+name: fptoui_s16_to_s1_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: fptoui_s16_to_s1_vv
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit %2
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s16) = G_TRUNC %0
+ %2:vgpr(s32) = G_FPTOUI %1
+ %3:vgpr(s1) = G_TRUNC %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptoui_s16_to_s1_vs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GCN-LABEL: name: fptoui_s16_to_s1_vs
+ ; GCN: liveins: $sgpr0
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0
+ %2:vgpr(s32) = G_FPTOUI %1
+ %3:vgpr(s1) = G_TRUNC %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fptoui_s16_to_s1_fneg_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
+ ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit %3
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s16) = G_TRUNC %0
+ %2:vgpr(s16) = G_FNEG %1
+ %3:vgpr(s32) = G_FPTOUI %2
+ %4:vgpr(s1) = G_TRUNC %3
+ S_ENDPGM 0, implicit %4
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
index 74f6f01a4548..3ad600c8c31c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
@@ -644,3 +644,27 @@ body: |
%1:_(<2 x s64>) = G_FPTOSI %0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
+
+---
+name: test_fptosi_s16_to_s1
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; SI-LABEL: name: test_fptosi_s16_to_s1
+ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
+ ; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+ ; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+ ; VI-LABEL: name: test_fptosi_s16_to_s1
+ ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
+ ; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+ ; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s1) = G_FPTOSI %1
+ S_ENDPGM 0, implicit %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
index 87482c0b48d8..f731bbf4f9cc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
@@ -814,3 +814,27 @@ body: |
%1:_(<2 x s64>) = G_FPTOUI %0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
+
+---
+name: test_fptoui_s16_to_s1
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; SI-LABEL: name: test_fptoui_s16_to_s1
+ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
+ ; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+ ; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+ ; VI-LABEL: name: test_fptoui_s16_to_s1
+ ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
+ ; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
+ ; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s1) = G_FPTOSI %1
+ S_ENDPGM 0, implicit %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index 7fa4f3c32ba8..343444c3cc85 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -132,3 +132,16 @@ entry:
store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
ret void
}
+
+; GCN-LABEL: {{^}}fptosi_f16_to_i1:
+; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
+; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}}
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
+; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}
+; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
+define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+entry:
+ %conv = fptosi half %in to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
index 11c946b308b0..8bec6d795b99 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -130,3 +130,16 @@ entry:
store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
ret void
}
+
+; GCN-LABEL: {{^}}fptoui_f16_to_i1:
+; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
+; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}}
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
+; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
+; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
+define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+entry:
+ %conv = fptoui half %in to i1
+ store i1 %conv, i1 addrspace(1)* %out
+ ret void
+}
More information about the llvm-commits
mailing list