[llvm] r349694 - AMDGPU: Add patterns for v4i16/v4f16 -> v4i16/v4f16 bitcasts

Wed Dec 19 14:53:33 PST 2018

Author: pendingchaos
Date: Wed Dec 19 14:53:33 2018
New Revision: 349694

URL: http://llvm.org/viewvc/llvm-project?rev=349694&view=rev
Log:
AMDGPU: Add patterns for v4i16/v4f16 -> v4i16/v4f16 bitcasts

Reviewers: arsenm, tstellar

Reviewed By: arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D55058

Added:
    llvm/trunk/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=349694&r1=349693&r2=349694&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Wed Dec 19 14:53:33 2018
@@ -852,6 +852,8 @@ def : BitConvert <f64, v2f32, VReg_64>;
 def : BitConvert <v2f32, f64, VReg_64>;
 def : BitConvert <f64, v2i32, VReg_64>;
 def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <v4i16, v4f16, VReg_64>;
+def : BitConvert <v4f16, v4i16, VReg_64>;
 
 // FIXME: Make SGPR
 def : BitConvert <v2i32, v4f16, VReg_64>;

Added: llvm/trunk/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll?rev=349694&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll Wed Dec 19 14:53:33 2018
@@ -0,0 +1,35 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
+
+; creating v4i16->v4f16 and v4f16->v4i16 bitcasts in the selection DAG is rather
+; difficult, so this test has to throw in some llvm.amdgcn.wqm to get them
+
+; LABEL: {{^}}test_to_i16:
+; CHECK: s_endpgm
+define amdgpu_ps void @test_to_i16(<4 x i32> inreg, <4 x half> inreg) #0 {
+  %a_tmp = call <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half> %1)
+  %a_i16_tmp = bitcast <4 x half> %a_tmp to <4 x i16>
+  %a_i16 = call <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16> %a_i16_tmp)
+
+  %a_i32 = bitcast <4 x i16> %a_i16 to <2 x i32>
+  call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %a_i32, <4 x i32> %0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+; LABEL: {{^}}test_to_half:
+; CHECK: s_endpgm
+define amdgpu_ps void @test_to_half(<4 x i32> inreg, <4 x i16> inreg) #0 {
+  %a_tmp = call <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16> %1)
+  %a_half_tmp = bitcast <4 x i16> %a_tmp to <4 x half>
+  %a_half = call <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half> %a_half_tmp)
+
+  %a_i32 = bitcast <4 x half> %a_half to <2 x i32>
+  call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %a_i32, <4 x i32> %0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+declare <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half>) #1
+declare <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16>) #1
+declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }