[PATCH] D51925: [AMDGPU] Fix issue for zext of f16 to i32

Tue Sep 11 04:47:30 PDT 2018

dstuttard created this revision.
Herald added subscribers: llvm-commits, t-tye, tpr, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.

Vulkan exposed an issue with this for a case with v_mad_mixlo_f16 where the
upper 16 bits were not cleared.

Modifying this to clear the bits instead of just copying fixed the problem.

V2: Fixed up "Fix issue for zext of f16 to i32"
V3: Fixed fcanonicalize-elimination test


Repository:
  rL LLVM

https://reviews.llvm.org/D51925

Files:
  lib/Target/AMDGPU/SIInstructions.td
  test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
  test/CodeGen/AMDGPU/fptrunc.f16.ll
  test/CodeGen/AMDGPU/mad-mix-lo.ll


Index: test/CodeGen/AMDGPU/mad-mix-lo.ll
===================================================================

--- test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -286,6 +286,22 @@
   ret <4 x half> %cvt.result
 }
 
+; GCN-LABEL: mixlo_zext:
+; GCN: s_waitcnt
+; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}}
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_setpc_b64
+
+; CIVI: v_mac_f32_e32
+; CIVI: v_cvt_f16_f32_e32
+define i32 @mixlo_zext(float %src0, float %src1, float %src2) #0 {
+  %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
+  %cvt.result = fptrunc float %result to half
+  %cvt.result.i16 = bitcast half %cvt.result to i16
+  %cvt.result.i32 = zext i16 %cvt.result.i16 to i32
+  ret i32 %cvt.result.i32
+}
+
 declare half @llvm.minnum.f16(half, half) #1
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1
Index: test/CodeGen/AMDGPU/fptrunc.f16.ll
===================================================================
--- test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -141,7 +141,8 @@
 ; GCN-LABEL: {{^}}fptrunc_f32_to_f16_zext_i32:
 ; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
 ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
-; GCN-NOT: v[[R_F16]]
+; SIVI-NOT: v[[R_F16]]
+; GFX9: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
 ; GCN: buffer_store_dword v[[R_F16]]
 define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
     i32 addrspace(1)* %r,
@@ -158,7 +159,8 @@
 ; GCN-LABEL: {{^}}fptrunc_fabs_f32_to_f16_zext_i32:
 ; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
 ; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
-; GCN-NOT: v[[R_F16]]
+; SIVI-NOT: v[[R_F16]]
+; GFX9: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
 ; GCN: buffer_store_dword v[[R_F16]]
 define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
     i32 addrspace(1)* %r,
Index: test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
===================================================================
--- test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -793,8 +793,8 @@
 }
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_v2f16:
-; GFX9: v_pk_mul_f16
-; GFX9: v_mul_f16_e32
+; GFX9-DAG: v_pk_mul_f16
+; GFX9-DAG: v_mul_f16_e32
 ; GFX9-NOT: v_max
 ; GFX9-NOT: v_pk_max
 define <2 x half> @v_test_canonicalize_insertelement_v2f16(<2 x half> %vec, half %val, i32 %idx) {
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -1343,11 +1343,16 @@
 //===----------------------------------------------------------------------===//
 // Miscellaneous Patterns
 //===----------------------------------------------------------------------===//
+let OtherPredicates = [ Predicate<"Subtarget->getGeneration() < AMDGPUSubtarget::GFX9"> ] in {
 def : GCNPat <
   (i32 (AMDGPUfp16_zext f16:$src)),
   (COPY $src)
 >;
-
+}
+def : GCNPat <
+  (i32 (AMDGPUfp16_zext f16:$src)),
+  (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x0000ffff)))
+>;
 
 def : GCNPat <
   (i32 (trunc i64:$a)),


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D51925.164846.patch
Type: text/x-patch
Size: 3241 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180911/f943e18a/attachment.bin>