[llvm] r316427 - AMDGPU: Add new intrinsic llvm.amdgcn.kill(i1)
Marek Olsak via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 03:27:13 PDT 2017
Author: mareko
Date: Tue Oct 24 03:27:13 2017
New Revision: 316427
URL: http://llvm.org/viewvc/llvm-project?rev=316427&view=rev
Log:
AMDGPU: Add new intrinsic llvm.amdgcn.kill(i1)
Summary:
Kill the thread if operand 0 == false.
llvm.amdgcn.wqm.vote can be applied to the operand.
Also allow kill in all shader stages.
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D38544
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir
llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Tue Oct 24 03:27:13 2017
@@ -753,6 +753,9 @@ def int_amdgcn_wqm_vote : Intrinsic<[llv
[llvm_i1_ty], [IntrNoMem, IntrConvergent]
>;
+// If false, set EXEC=0 for the current thread until the end of program.
+def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
+
// Copies the active channels of the source value to the destination value,
// with the guarantee that the source value is computed as if the entire
// program were executed in Whole Wavefront Mode, i.e. with all channels
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Tue Oct 24 03:27:13 2017
@@ -167,7 +167,6 @@ def COND_OLE : PatLeaf <
[{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
>;
-
def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Oct 24 03:27:13 2017
@@ -2449,7 +2449,7 @@ MachineBasicBlock *SITargetLowering::spl
if (SplitPoint == BB->end()) {
// Don't bother with a new block.
- MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR));
+ MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
return BB;
}
@@ -2463,7 +2463,7 @@ MachineBasicBlock *SITargetLowering::spl
SplitBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(SplitBB);
- MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR));
+ MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
return SplitBB;
}
@@ -3017,7 +3017,8 @@ MachineBasicBlock *SITargetLowering::Emi
case AMDGPU::SI_INDIRECT_DST_V8:
case AMDGPU::SI_INDIRECT_DST_V16:
return emitIndirectDst(MI, *BB, *getSubtarget());
- case AMDGPU::SI_KILL:
+ case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
+ case AMDGPU::SI_KILL_I1_PSEUDO:
return splitKillBlock(MI, BB);
case AMDGPU::V_CNDMASK_B64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp Tue Oct 24 03:27:13 2017
@@ -200,25 +200,101 @@ bool SIInsertSkips::skipIfDead(MachineIn
void SIInsertSkips::kill(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- const MachineOperand &Op = MI.getOperand(0);
-#ifndef NDEBUG
- CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
- // Kill is only allowed in pixel / geometry shaders.
- assert(CallConv == CallingConv::AMDGPU_PS ||
- CallConv == CallingConv::AMDGPU_GS);
-#endif
- // Clear this thread from the exec mask if the operand is negative.
- if (Op.isImm()) {
- // Constant operand: Set exec mask to 0 or do nothing
- if (Op.getImm() & 0x80000000) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addImm(0);
+ switch (MI.getOpcode()) {
+ case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {
+ unsigned Opcode = 0;
+
+ // The opcodes are inverted because the inline immediate has to be
+ // the first operand, e.g. from "x < imm" to "imm > x"
+ switch (MI.getOperand(2).getImm()) {
+ case ISD::SETOEQ:
+ case ISD::SETEQ:
+ Opcode = AMDGPU::V_CMPX_EQ_F32_e32;
+ break;
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ Opcode = AMDGPU::V_CMPX_LT_F32_e32;
+ break;
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Opcode = AMDGPU::V_CMPX_LE_F32_e32;
+ break;
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ Opcode = AMDGPU::V_CMPX_GT_F32_e32;
+ break;
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Opcode = AMDGPU::V_CMPX_GE_F32_e32;
+ break;
+ case ISD::SETONE:
+ case ISD::SETNE:
+ Opcode = AMDGPU::V_CMPX_LG_F32_e32;
+ break;
+ case ISD::SETO:
+ Opcode = AMDGPU::V_CMPX_O_F32_e32;
+ break;
+ case ISD::SETUO:
+ Opcode = AMDGPU::V_CMPX_U_F32_e32;
+ break;
+ case ISD::SETUEQ:
+ Opcode = AMDGPU::V_CMPX_NLG_F32_e32;
+ break;
+ case ISD::SETUGT:
+ Opcode = AMDGPU::V_CMPX_NGE_F32_e32;
+ break;
+ case ISD::SETUGE:
+ Opcode = AMDGPU::V_CMPX_NGT_F32_e32;
+ break;
+ case ISD::SETULT:
+ Opcode = AMDGPU::V_CMPX_NLE_F32_e32;
+ break;
+ case ISD::SETULE:
+ Opcode = AMDGPU::V_CMPX_NLT_F32_e32;
+ break;
+ case ISD::SETUNE:
+ Opcode = AMDGPU::V_CMPX_NEQ_F32_e32;
+ break;
+ default:
+ llvm_unreachable("invalid ISD:SET cond code");
}
- } else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
- .addImm(0)
+
+ // TODO: Allow this:
+ if (!MI.getOperand(0).isReg() ||
+ !TRI->isVGPR(MBB.getParent()->getRegInfo(),
+ MI.getOperand(0).getReg()))
+ llvm_unreachable("SI_KILL operand should be a VGPR");
+
+ BuildMI(MBB, &MI, DL, TII->get(Opcode))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(0));
+ break;
+ }
+ case AMDGPU::SI_KILL_I1_TERMINATOR: {
+ const MachineOperand &Op = MI.getOperand(0);
+ int64_t KillVal = MI.getOperand(1).getImm();
+ assert(KillVal == 0 || KillVal == -1);
+
+ // Kill all threads if Op0 is an immediate and equal to the Kill value.
+ if (Op.isImm()) {
+ int64_t Imm = Op.getImm();
+ assert(Imm == 0 || Imm == -1);
+
+ if (Imm == KillVal)
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addImm(0);
+ break;
+ }
+
+ unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
+ BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
.add(Op);
+ break;
+ }
+ default:
+ llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR");
}
}
@@ -311,7 +387,8 @@ bool SIInsertSkips::runOnMachineFunction
}
break;
- case AMDGPU::SI_KILL_TERMINATOR:
+ case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
+ case AMDGPU::SI_KILL_I1_TERMINATOR:
MadeChange = true;
kill(MI);
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Oct 24 03:27:13 2017
@@ -4591,3 +4591,24 @@ SIInstrInfo::getAddNoCarry(MachineBasicB
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
+
+bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
+ switch (Opcode) {
+ case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
+ case AMDGPU::SI_KILL_I1_TERMINATOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const {
+ switch (Opcode) {
+ case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
+ return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
+ case AMDGPU::SI_KILL_I1_PSEUDO:
+ return get(AMDGPU::SI_KILL_I1_TERMINATOR);
+ default:
+ llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
+ }
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Tue Oct 24 03:27:13 2017
@@ -857,6 +857,9 @@ public:
MachineBasicBlock::iterator I,
const DebugLoc &DL,
unsigned DestReg) const;
+
+ static bool isKillTerminator(unsigned Opcode);
+ const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
};
namespace AMDGPU {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Tue Oct 24 03:27:13 2017
@@ -297,6 +297,10 @@ def as_i64imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
}]>;
+def cond_as_i32imm: SDNodeXForm<cond, [{
+ return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
+}]>;
+
// Copied from the AArch64 backend:
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Tue Oct 24 03:27:13 2017
@@ -275,18 +275,21 @@ def SI_ELSE_BREAK : CFPseudoInstSI <
}
let Uses = [EXEC], Defs = [EXEC,VCC] in {
-def SI_KILL : PseudoInstSI <
- (outs), (ins VSrc_b32:$src),
- [(AMDGPUkill i32:$src)]> {
- let isConvergent = 1;
- let usesCustomInserter = 1;
-}
-def SI_KILL_TERMINATOR : SPseudoInstSI <
- (outs), (ins VSrc_b32:$src)> {
- let isTerminator = 1;
+multiclass PseudoInstKill <dag ins> {
+ def _PSEUDO : PseudoInstSI <(outs), ins> {
+ let isConvergent = 1;
+ let usesCustomInserter = 1;
+ }
+
+ def _TERMINATOR : SPseudoInstSI <(outs), ins> {
+ let isTerminator = 1;
+ }
}
+defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
+defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
+
def SI_ILLEGAL_COPY : SPseudoInstSI <
(outs unknown:$dst), (ins unknown:$src),
[], " ; illegal copy $src to $dst">;
@@ -546,8 +549,35 @@ def : GCNPat<
def : GCNPat <
(int_AMDGPU_kilp),
- (SI_KILL (i32 0xbf800000))
+ (SI_KILL_I1_PSEUDO (i1 0), 0)
+>;
+
+def : Pat <
+ // -1.0 as i32 (LowerINTRINSIC_VOID converts all other constants to -1.0)
+ (AMDGPUkill (i32 -1082130432)),
+ (SI_KILL_I1_PSEUDO (i1 0), 0)
+>;
+
+def : Pat <
+ (int_amdgcn_kill i1:$src),
+ (SI_KILL_I1_PSEUDO $src, 0)
+>;
+
+def : Pat <
+ (int_amdgcn_kill (i1 (not i1:$src))),
+ (SI_KILL_I1_PSEUDO $src, -1)
+>;
+
+def : Pat <
+ (AMDGPUkill i32:$src),
+ (SI_KILL_F32_COND_IMM_PSEUDO $src, 0, 3) // 3 means SETOGE
+>;
+
+def : Pat <
+ (int_amdgcn_kill (i1 (setcc f32:$src, InlineFPImm<f32>:$imm, cond:$cond))),
+ (SI_KILL_F32_COND_IMM_PSEUDO $src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
>;
+// TODO: we could add more variants for other types of conditionals
//===----------------------------------------------------------------------===//
// VOP1 Patterns
Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Tue Oct 24 03:27:13 2017
@@ -134,7 +134,8 @@ static void setImpSCCDefDead(MachineInst
char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
-static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) {
+static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
+ const SIInstrInfo *TII) {
unsigned SaveExecReg = MI.getOperand(0).getReg();
auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
@@ -143,7 +144,7 @@ static bool isSimpleIf(const MachineInst
U->getOpcode() != AMDGPU::SI_END_CF)
return false;
- // Check for SI_KILL_TERMINATOR on path from if to endif.
+ // Check for SI_KILL_*_TERMINATOR on path from if to endif.
// if there is any such terminator simplififcations are not safe.
auto SMBB = MI.getParent();
auto EMBB = U->getParent();
@@ -157,7 +158,7 @@ static bool isSimpleIf(const MachineInst
if (MBB == EMBB || !Visited.insert(MBB).second)
continue;
for(auto &Term : MBB->terminators())
- if (Term.getOpcode() == AMDGPU::SI_KILL_TERMINATOR)
+ if (TII->isKillTerminator(Term.getOpcode()))
return false;
Worklist.append(MBB->succ_begin(), MBB->succ_end());
@@ -184,7 +185,7 @@ void SILowerControlFlow::emitIf(MachineI
// If there is only one use of save exec register and that use is SI_END_CF,
// we can optimize SI_IF by returning the full saved exec mask instead of
// just cleared bits.
- bool SimpleIf = isSimpleIf(MI, MRI);
+ bool SimpleIf = isSimpleIf(MI, MRI, TII);
// Add an implicit def of exec to discourage scheduling VALU after this which
// will interfere with trying to form s_and_saveexec_b64 later.
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Oct 24 03:27:13 2017
@@ -3539,6 +3539,14 @@ Instruction *InstCombiner::visitCallInst
return replaceInstUsesWith(*II, II->getArgOperand(0));
}
+ case Intrinsic::amdgcn_kill: {
+ const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
+ if (!C || !C->getZExtValue())
+ break;
+
+ // amdgcn.kill(i1 1) is a no-op
+ return eraseInstFromFunction(CI);
+ }
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir Tue Oct 24 03:27:13 2017
@@ -33,7 +33,7 @@ body: |
bb.1:
successors: %bb.2
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
- SI_KILL_TERMINATOR %vgpr0, implicit-def %exec, implicit-def %vcc, implicit %exec
+ SI_KILL_F32_COND_IMM_TERMINATOR %vgpr0, 0, 3, implicit-def %exec, implicit-def %vcc, implicit %exec
S_BRANCH %bb.2
bb.2:
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll?rev=316427&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll Tue Oct 24 03:27:13 2017
@@ -0,0 +1,241 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
+
+; SI-LABEL: {{^}}gs_const:
+; SI-NOT: v_cmpx
+; SI: s_mov_b64 exec, 0
+define amdgpu_gs void @gs_const() {
+ %tmp = icmp ule i32 0, 3
+ %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
+ %c1 = fcmp oge float %tmp1, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ %tmp2 = icmp ule i32 3, 0
+ %tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
+ %c2 = fcmp oge float %tmp3, 0.0
+ call void @llvm.amdgcn.kill(i1 %c2)
+ ret void
+}
+
+; SI-LABEL: {{^}}vcc_implicit_def:
+; SI-NOT: v_cmp_gt_f32_e32 vcc,
+; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
+; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
+define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
+ %tmp0 = fcmp olt float %arg13, 0.000000e+00
+ %c1 = fcmp oge float %arg14, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
+ call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+ ret void
+}
+
+; SI-LABEL: {{^}}true:
+; SI-NEXT: BB#
+; SI-NEXT: BB#
+; SI-NEXT: s_endpgm
+define amdgpu_gs void @true() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; SI-LABEL: {{^}}false:
+; SI-NOT: v_cmpx
+; SI: s_mov_b64 exec, 0
+define amdgpu_gs void @false() {
+ call void @llvm.amdgcn.kill(i1 false)
+ ret void
+}
+
+; SI-LABEL: {{^}}and:
+; SI: v_cmp_lt_i32
+; SI: v_cmp_lt_i32
+; SI: s_or_b64 s[0:1]
+; SI: s_and_b64 exec, exec, s[0:1]
+define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
+ %c1 = icmp slt i32 %a, %b
+ %c2 = icmp slt i32 %c, %d
+ %x = or i1 %c1, %c2
+ call void @llvm.amdgcn.kill(i1 %x)
+ ret void
+}
+
+; SI-LABEL: {{^}}andn2:
+; SI: v_cmp_lt_i32
+; SI: v_cmp_lt_i32
+; SI: s_xor_b64 s[0:1]
+; SI: s_andn2_b64 exec, exec, s[0:1]
+define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
+ %c1 = icmp slt i32 %a, %b
+ %c2 = icmp slt i32 %c, %d
+ %x = xor i1 %c1, %c2
+ %y = xor i1 %x, 1
+ call void @llvm.amdgcn.kill(i1 %y)
+ ret void
+}
+
+; SI-LABEL: {{^}}oeq:
+; SI: v_cmpx_eq_f32
+; SI-NOT: s_and
+define amdgpu_gs void @oeq(float %a) {
+ %c1 = fcmp oeq float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}ogt:
+; SI: v_cmpx_lt_f32
+; SI-NOT: s_and
+define amdgpu_gs void @ogt(float %a) {
+ %c1 = fcmp ogt float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}oge:
+; SI: v_cmpx_le_f32
+; SI-NOT: s_and
+define amdgpu_gs void @oge(float %a) {
+ %c1 = fcmp oge float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}olt:
+; SI: v_cmpx_gt_f32
+; SI-NOT: s_and
+define amdgpu_gs void @olt(float %a) {
+ %c1 = fcmp olt float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}ole:
+; SI: v_cmpx_ge_f32
+; SI-NOT: s_and
+define amdgpu_gs void @ole(float %a) {
+ %c1 = fcmp ole float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}one:
+; SI: v_cmpx_lg_f32
+; SI-NOT: s_and
+define amdgpu_gs void @one(float %a) {
+ %c1 = fcmp one float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}ord:
+; FIXME: This is absolutely unimportant, but we could use the cmpx variant here.
+; SI: v_cmp_o_f32
+define amdgpu_gs void @ord(float %a) {
+ %c1 = fcmp ord float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}uno:
+; FIXME: This is absolutely unimportant, but we could use the cmpx variant here.
+; SI: v_cmp_u_f32
+define amdgpu_gs void @uno(float %a) {
+ %c1 = fcmp uno float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}ueq:
+; SI: v_cmpx_nlg_f32
+; SI-NOT: s_and
+define amdgpu_gs void @ueq(float %a) {
+ %c1 = fcmp ueq float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}ugt:
+; SI: v_cmpx_nge_f32
+; SI-NOT: s_and
+define amdgpu_gs void @ugt(float %a) {
+ %c1 = fcmp ugt float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}uge:
+; SI: v_cmpx_ngt_f32_e32 vcc, -1.0
+; SI-NOT: s_and
+define amdgpu_gs void @uge(float %a) {
+ %c1 = fcmp uge float %a, -1.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}ult:
+; SI: v_cmpx_nle_f32_e32 vcc, -2.0
+; SI-NOT: s_and
+define amdgpu_gs void @ult(float %a) {
+ %c1 = fcmp ult float %a, -2.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}ule:
+; SI: v_cmpx_nlt_f32_e32 vcc, 2.0
+; SI-NOT: s_and
+define amdgpu_gs void @ule(float %a) {
+ %c1 = fcmp ule float %a, 2.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}une:
+; SI: v_cmpx_neq_f32_e32 vcc, 0
+; SI-NOT: s_and
+define amdgpu_gs void @une(float %a) {
+ %c1 = fcmp une float %a, 0.0
+ call void @llvm.amdgcn.kill(i1 %c1)
+ ret void
+}
+
+; SI-LABEL: {{^}}neg_olt:
+; SI: v_cmpx_ngt_f32_e32 vcc, 1.0
+; SI-NOT: s_and
+define amdgpu_gs void @neg_olt(float %a) {
+ %c1 = fcmp olt float %a, 1.0
+ %c2 = xor i1 %c1, 1
+ call void @llvm.amdgcn.kill(i1 %c2)
+ ret void
+}
+
+; SI-LABEL: {{^}}fcmp_x2:
+; FIXME: LLVM should be able to combine these fcmp opcodes.
+; SI: v_cmp_gt_f32
+; SI: v_cndmask_b32
+; SI: v_cmpx_le_f32
+define amdgpu_ps void @fcmp_x2(float %a) #0 {
+ %ogt = fcmp nsz ogt float %a, 2.500000e-01
+ %k = select i1 %ogt, float -1.000000e+00, float 0.000000e+00
+ %c = fcmp nsz oge float %k, 0.000000e+00
+ call void @llvm.amdgcn.kill(i1 %c) #1
+ ret void
+}
+
+; SI-LABEL: {{^}}wqm:
+; SI: v_cmp_neq_f32_e32 vcc, 0
+; SI: s_wqm_b64 s[0:1], vcc
+; SI: s_and_b64 exec, exec, s[0:1]
+define amdgpu_ps void @wqm(float %a) {
+ %c1 = fcmp une float %a, 0.0
+ %c2 = call i1 @llvm.amdgcn.wqm.vote(i1 %c1)
+ call void @llvm.amdgcn.kill(i1 %c2)
+ ret void
+}
+
+declare void @llvm.amdgcn.kill(i1) #0
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare i1 @llvm.amdgcn.wqm.vote(i1)
+
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll?rev=316427&r1=316426&r2=316427&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll Tue Oct 24 03:27:13 2017
@@ -1570,4 +1570,19 @@ main_body:
ret float %r
}
+; --------------------------------------------------------------------
+; llvm.amdgcn.kill
+; --------------------------------------------------------------------
+
+declare void @llvm.amdgcn.kill(i1)
+
+; CHECK-LABEL: @kill_true() {
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @kill_true() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+
; CHECK: attributes #5 = { convergent }
More information about the llvm-commits
mailing list