[llvm] r251632 - AMDGPU/SI: handle undef for llvm.SI.packf16
Marek Olsak via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 29 08:29:10 PDT 2015
Author: mareko
Date: Thu Oct 29 10:29:09 2015
New Revision: 251632
URL: http://llvm.org/viewvc/llvm-project?rev=251632&view=rev
Log:
AMDGPU/SI: handle undef for llvm.SI.packf16
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=251632&r1=251631&r2=251632&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Oct 29 10:29:09 2015
@@ -1091,6 +1091,10 @@ SDValue SITargetLowering::LowerINTRINSIC
DAG.getConstant(2, DL, MVT::i32), // P0
Op.getOperand(1), Op.getOperand(2), Glue);
}
+ case AMDGPUIntrinsic::SI_packf16:
+ if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())
+ return DAG.getUNDEF(MVT::i32);
+ return Op;
case AMDGPUIntrinsic::SI_fs_interp: {
SDValue IJ = Op.getOperand(4);
SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll?rev=251632&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.packf16.ll Thu Oct 29 10:29:09 2015
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}main:
+; GCN: v_cvt_pkrtz_f16_f32
+; GCN: v_cvt_pkrtz_f16_f32
+; GCN-NOT: v_cvt_pkrtz_f16_f32
+
+define void @main(float %src) #0 {
+main_body:
+ %p1 = call i32 @llvm.SI.packf16(float undef, float %src)
+ %p2 = call i32 @llvm.SI.packf16(float %src, float undef)
+ %p3 = call i32 @llvm.SI.packf16(float undef, float undef)
+ %f1 = bitcast i32 %p1 to float
+ %f2 = bitcast i32 %p2 to float
+ %f3 = bitcast i32 %p3 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f1, float undef, float %f1)
+ call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f2, float undef, float %f2)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %f3, float undef, float %f2)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
More information about the llvm-commits
mailing list