[llvm] r339511 - AMDGPU: Fix packing undef parts of build_vector
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 12 01:42:46 PDT 2018
Author: arsenm
Date: Sun Aug 12 01:42:46 2018
New Revision: 339511
URL: http://llvm.org/viewvc/llvm-project?rev=339511&view=rev
Log:
AMDGPU: Fix packing undef parts of build_vector
Added:
llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
llvm/trunk/test/CodeGen/AMDGPU/mad-mix-hi.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=339511&r1=339510&r2=339511&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Sun Aug 12 01:42:46 2018
@@ -4287,21 +4287,30 @@ SDValue SITargetLowering::lowerBUILD_VEC
}
assert(VT == MVT::v2f16 || VT == MVT::v2i16);
+ assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
- Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
- Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
+ // Avoid adding defined bits with the zero_extend.
+ if (Hi.isUndef()) {
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
+ return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
+ }
- Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
DAG.getConstant(16, SL, MVT::i32));
+ if (Lo.isUndef())
+ return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
- SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
+ SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
return DAG.getNode(ISD::BITCAST, SL, VT, Or);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=339511&r1=339510&r2=339511&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Sun Aug 12 01:42:46 2018
@@ -1461,13 +1461,32 @@ class ExpPattern<SDPatternOperator node,
def : ExpPattern<AMDGPUexport, i32, EXP>;
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
-// COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs
+// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
(v2i16 (build_vector (i16 0), i16:$src1)),
- (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0))
+ (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
>;
+def : GCNPat <
+ (v2i16 (build_vector i16:$src0, (i16 undef))),
+ (v2i16 (COPY $src0))
+>;
+
+def : GCNPat <
+ (v2f16 (build_vector f16:$src0, (f16 undef))),
+ (v2f16 (COPY $src0))
+>;
+
+def : GCNPat <
+ (v2i16 (build_vector (i16 undef), i16:$src1)),
+ (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+>;
+
+def : GCNPat <
+ (v2f16 (build_vector (f16 undef), f16:$src1)),
+ (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
Added: llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll?rev=339511&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll Sun Aug 12 01:42:46 2018
@@ -0,0 +1,380 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
+
+define void @undef_lo_v2i16(i16 %arg0) {
+; GFX9-LABEL: undef_lo_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
+ call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo);
+ ret void
+}
+
+define void @undef_lo_v2f16(half %arg0) {
+; GFX9-LABEL: undef_lo_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
+ call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo);
+ ret void
+}
+
+define void @undef_lo_op_v2f16(half %arg0) {
+; GFX9-LABEL: undef_lo_op_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo_op_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX8-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, 0x7e00, v0
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
+ %op = fadd <2 x half> %undef.lo, <half 1.0, half 1.0>
+ call void asm sideeffect "; use $0", "v"(<2 x half> %op);
+ ret void
+}
+
+define void @undef_lo_op_v2i16(i16 %arg0) {
+; GFX9-LABEL: undef_lo_op_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: s_movk_i32 s6, 0x63
+; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo_op_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
+ %op = add <2 x i16> %undef.lo, <i16 99, i16 99>
+ call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
+ ret void
+}
+
+define void @undef_lo3_v4i16(i16 %arg0) {
+; GFX9-LABEL: undef_lo3_v4i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo3_v4i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = insertelement <4 x i16> undef, i16 %arg0, i32 1
+ call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
+ ret void
+}
+
+define void @undef_lo3_v4f16(half %arg0) {
+; GFX9-LABEL: undef_lo3_v4f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo3_v4f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = insertelement <4 x half> undef, half %arg0, i32 1
+ call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
+ ret void
+}
+
+define void @undef_lo2_v4i16(<2 x i16> %arg0) {
+; GFX9-LABEL: undef_lo2_v4i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000
+; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo2_v4i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+ call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
+ ret void
+}
+
+define void @undef_lo2_v4f16(<2 x half> %arg0) {
+; GFX9-LABEL: undef_lo2_v4f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
+; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_lo2_v4f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+ call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
+ ret void
+}
+
+define void @undef_hi_v2i16(i16 %arg0) {
+; GFX9-LABEL: undef_hi_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
+ call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi);
+ ret void
+}
+
+define void @undef_hi_v2f16(half %arg0) {
+; GFX9-LABEL: undef_hi_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
+ call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi);
+ ret void
+}
+
+define void @undef_hi_op_v2f16(half %arg0) {
+; GFX9-LABEL: undef_hi_op_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi_op_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
+ %op = fadd <2 x half> %undef.hi, <half 1.0, half 1.0>
+ call void asm sideeffect "; use $0", "v"(<2 x half> %op);
+ ret void
+}
+
+define void @undef_hi_op_v2i16(i16 %arg0) {
+; GFX9-LABEL: undef_hi_op_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_movk_i32 s6, 0x63
+; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v0
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi_op_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v0
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
+ %op = add <2 x i16> %undef.hi, <i16 99, i16 99>
+ call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
+ ret void
+}
+
+define void @undef_hi3_v4i16(i16 %arg0) {
+; GFX9-LABEL: undef_hi3_v4i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi3_v4i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = insertelement <4 x i16> undef, i16 %arg0, i32 0
+ call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
+ ret void
+}
+
+define void @undef_hi3_v4f16(half %arg0) {
+; GFX9-LABEL: undef_hi3_v4f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi3_v4f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = insertelement <4 x half> undef, half %arg0, i32 0
+ call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
+ ret void
+}
+
+define void @undef_hi2_v4i16(<2 x i16> %arg0) {
+; GFX9-LABEL: undef_hi2_v4i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi2_v4i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
+ ret void
+}
+
+define void @undef_hi2_v4f16(<2 x half> %arg0) {
+; GFX9-LABEL: undef_hi2_v4f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use v[0:1]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: undef_hi2_v4f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
+ ret void
+}
+
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=339511&r1=339510&r2=339511&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Sun Aug 12 01:42:46 2018
@@ -402,9 +402,9 @@ define amdgpu_kernel void @test_call_ext
; FIXME: materialize constant directly in VGPR
; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm:
; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001
-; GFX9-DAG: s_pack_ll_b32_b16 [[K23:s[0-9]+]], 3, s{{[0-9]+}}
+; GFX9-DAG: s_mov_b32 [[K2:s[0-9]+]], 3
; GFX9: v_mov_b32_e32 v0, [[K01]]
-; GFX9: v_mov_b32_e32 v1, [[K23]]
+; GFX9: v_mov_b32_e32 v1, [[K2]]
; GFX9: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad-mix-hi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad-mix-hi.ll?rev=339511&r1=339510&r2=339511&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad-mix-hi.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad-mix-hi.ll Sun Aug 12 01:42:46 2018
@@ -83,8 +83,10 @@ define i32 @v_mad_mixhi_f16_f16lo_f16lo_
}
; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
-; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
-; GFX9: v_cvt_f16_f32_e32 v0, v0
+; GCN: s_waitcnt
+; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
+; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX9-NEXT: s_setpc_b64
define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 {
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
More information about the llvm-commits
mailing list