[PATCH] D51925: [AMDGPU] Fix issue for zext of f16 to i32
David Stuttard via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 11 04:47:30 PDT 2018
dstuttard created this revision.
Herald added subscribers: llvm-commits, t-tye, tpr, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Vulkan exposed an issue with this for a case with v_mad_mixlo_f16 where the
upper 16 bits were not cleared.
Modifying this to clear the bits instead of just copying fixed the problem.
V2: Fixed up "Fix issue for zext of f16 to i32"
V3: Fixed fcanonicalize-elimination test
Repository:
rL LLVM
https://reviews.llvm.org/D51925
Files:
lib/Target/AMDGPU/SIInstructions.td
test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
test/CodeGen/AMDGPU/fptrunc.f16.ll
test/CodeGen/AMDGPU/mad-mix-lo.ll
Index: test/CodeGen/AMDGPU/mad-mix-lo.ll
===================================================================
--- test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -286,6 +286,22 @@
ret <4 x half> %cvt.result
}
+; GCN-LABEL: mixlo_zext:
+; GCN: s_waitcnt
+; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}}
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_setpc_b64
+
+; CIVI: v_mac_f32_e32
+; CIVI: v_cvt_f16_f32_e32
+define i32 @mixlo_zext(float %src0, float %src1, float %src2) #0 {
+ %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
+ %cvt.result = fptrunc float %result to half
+ %cvt.result.i16 = bitcast half %cvt.result to i16
+ %cvt.result.i32 = zext i16 %cvt.result.i16 to i32
+ ret i32 %cvt.result.i32
+}
+
declare half @llvm.minnum.f16(half, half) #1
declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1
Index: test/CodeGen/AMDGPU/fptrunc.f16.ll
===================================================================
--- test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -141,7 +141,8 @@
; GCN-LABEL: {{^}}fptrunc_f32_to_f16_zext_i32:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
-; GCN-NOT: v[[R_F16]]
+; SIVI-NOT: v[[R_F16]]
+; GFX9: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
; GCN: buffer_store_dword v[[R_F16]]
define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
i32 addrspace(1)* %r,
@@ -158,7 +159,8 @@
; GCN-LABEL: {{^}}fptrunc_fabs_f32_to_f16_zext_i32:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
-; GCN-NOT: v[[R_F16]]
+; SIVI-NOT: v[[R_F16]]
+; GFX9: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
; GCN: buffer_store_dword v[[R_F16]]
define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
i32 addrspace(1)* %r,
Index: test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
===================================================================
--- test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -793,8 +793,8 @@
}
; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_v2f16:
-; GFX9: v_pk_mul_f16
-; GFX9: v_mul_f16_e32
+; GFX9-DAG: v_pk_mul_f16
+; GFX9-DAG: v_mul_f16_e32
; GFX9-NOT: v_max
; GFX9-NOT: v_pk_max
define <2 x half> @v_test_canonicalize_insertelement_v2f16(<2 x half> %vec, half %val, i32 %idx) {
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -1343,11 +1343,16 @@
//===----------------------------------------------------------------------===//
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//
+let OtherPredicates = [ Predicate<"Subtarget->getGeneration() < AMDGPUSubtarget::GFX9"> ] in {
def : GCNPat <
(i32 (AMDGPUfp16_zext f16:$src)),
(COPY $src)
>;
-
+}
+def : GCNPat <
+ (i32 (AMDGPUfp16_zext f16:$src)),
+ (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x0000ffff)))
+>;
def : GCNPat <
(i32 (trunc i64:$a)),
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D51925.164846.patch
Type: text/x-patch
Size: 3241 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180911/f943e18a/attachment.bin>
More information about the llvm-commits
mailing list