[llvm] r281780 - AMDGPU: Use SOPK compare instructions
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 16 14:41:16 PDT 2016
Author: arsenm
Date: Fri Sep 16 16:41:16 2016
New Revision: 281780
URL: http://llvm.org/viewvc/llvm-project?rev=281780&view=rev
Log:
AMDGPU: Use SOPK compare instructions
Added:
llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIDefines.h
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Fri Sep 16 16:41:16 2016
@@ -43,7 +43,8 @@ enum {
SGPRSpill = 1 << 24,
VOPAsmPrefer32Bit = 1 << 25,
Gather4 = 1 << 26,
- DisableWQM = 1 << 27
+ DisableWQM = 1 << 27,
+ SOPK_ZEXT = 1 << 28
};
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Fri Sep 16 16:41:16 2016
@@ -56,6 +56,10 @@ class InstSI <dag outs, dag ins, string
// Whether WQM _must_ be disabled for this instruction.
field bits<1> DisableWQM = 0;
+ // Most sopk treat the immediate as a signed 16-bit, however some
+ // use it as unsigned.
+ field bits<1> SOPKZext = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@@ -89,6 +93,7 @@ class InstSI <dag outs, dag ins, string
let TSFlags{25} = VOPAsmPrefer32Bit;
let TSFlags{26} = Gather4;
let TSFlags{27} = DisableWQM;
+ let TSFlags{28} = SOPKZext;
let SchedRW = [Write32Bit];
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Sep 16 16:41:16 2016
@@ -1812,6 +1812,21 @@ bool SIInstrInfo::verifyInstruction(cons
}
}
+ if (isSOPK(MI)) {
+ int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
+ if (sopkIsZext(MI)) {
+ if (!isUInt<16>(Imm)) {
+ ErrInfo = "invalid immediate for SOPK instruction";
+ return false;
+ }
+ } else {
+ if (!isInt<16>(Imm)) {
+ ErrInfo = "invalid immediate for SOPK instruction";
+ return false;
+ }
+ }
+ }
+
if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Fri Sep 16 16:41:16 2016
@@ -391,6 +391,14 @@ public:
return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
}
+ static bool sopkIsZext(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
+ }
+
+ bool sopkIsZext(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
@@ -603,6 +611,9 @@ namespace AMDGPU {
LLVM_READONLY
int getAtomicNoRetOp(uint16_t Opcode);
+ LLVM_READONLY
+ int getSOPKOp(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Fri Sep 16 16:41:16 2016
@@ -1193,7 +1193,7 @@ class VOP <string opName> {
string OpName = opName;
}
-class VOP2_REV <string revOp, bit isOrig> {
+class Commutable_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
}
@@ -1325,7 +1325,7 @@ multiclass VOP2SI_m <vop2 op, string opN
string revOp> {
def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
- VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
}
@@ -1334,7 +1334,7 @@ multiclass VOP2_m <vop2 op, string opNam
string revOp> {
def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
- VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
@@ -1523,7 +1523,7 @@ multiclass VOP3_2_m <vop op, dag outs, d
bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
VOP3DisableFields<1, 0, HasMods>;
@@ -1537,7 +1537,7 @@ multiclass VOP3SI_2_m <vop op, dag outs,
bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
VOP3DisableFields<1, 0, HasMods>;
@@ -1578,7 +1578,7 @@ multiclass VOP3_C_m <vop op, dag outs, d
string revOp, list<SchedReadWrite> sched> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
let Defs = !if(defExec, [EXEC], []);
let SchedRW = sched;
}
@@ -1829,7 +1829,7 @@ multiclass VOPC_m <vopc op, dag ins, str
string revOpName = "", string asm = opName#"_e32 "#op_asm,
string alias_asm = opName#" "#op_asm> {
def "" : VOPC_Pseudo <ins, pattern, opName>,
- VOP2_REV<revOpName#"_e32", !eq(revOpName, opName)> {
+ Commutable_REV<revOpName#"_e32", !eq(revOpName, opName)> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = sched;
let isConvergent = DefExec;
@@ -2106,7 +2106,7 @@ def getMaskedMIMGOp : InstrMapping {
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
- let FilterClass = "VOP2_REV";
+ let FilterClass = "Commutable_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["0"];
@@ -2115,31 +2115,13 @@ def getCommuteOrig : InstrMapping {
// Maps an original opcode to its commuted version
def getCommuteRev : InstrMapping {
- let FilterClass = "VOP2_REV";
+ let FilterClass = "Commutable_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["1"];
let ValueCols = [["0"]];
}
-def getCommuteCmpOrig : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["0"];
- let ValueCols = [["1"]];
-}
-
-// Maps an original opcode to its commuted version
-def getCommuteCmpRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
-
def getMCOpcodeGen : InstrMapping {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
@@ -2149,6 +2131,15 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.VI)]];
}
+// Get equivalent SOPK instruction.
+def getSOPKOp : InstrMapping {
+ let FilterClass = "SOPKInstTable";
+ let RowFields = ["BaseCmpOp"];
+ let ColFields = ["IsSOPK"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
def getAddr64Inst : InstrMapping {
let FilterClass = "MUBUFAddr64Table";
let RowFields = ["OpName"];
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Fri Sep 16 16:41:16 2016
@@ -188,6 +188,26 @@ static bool isKImmOperand(const SIInstrI
return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
}
+static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+ return isUInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+}
+
+static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
+ const MachineOperand &Src,
+ bool &IsUnsigned) {
+ if (isInt<16>(Src.getImm())) {
+ IsUnsigned = false;
+ return !TII->isInlineConstant(Src, 4);
+ }
+
+ if (isUInt<16>(Src.getImm())) {
+ IsUnsigned = true;
+ return !TII->isInlineConstant(Src, 4);
+ }
+
+ return false;
+}
+
/// Copy implicit register operands from specified instruction to this
/// instruction that are not part of the instruction definition.
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
@@ -202,6 +222,44 @@ static void copyExtraImplicitOps(Machine
}
}
+static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
+ // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
+ // get constants on the RHS.
+ if (!MI.getOperand(0).isReg())
+ TII->commuteInstruction(MI, false, 0, 1);
+
+ const MachineOperand &Src1 = MI.getOperand(1);
+ if (!Src1.isImm())
+ return;
+
+ int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
+ if (SOPKOpc == -1)
+ return;
+
+ // eq/ne is special because the imm16 can be treated as signed or unsigned,
+ // and initially selectd to the signed versions.
+ if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) {
+ bool HasUImm;
+ if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
+ if (HasUImm) {
+ SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ?
+ AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32;
+ }
+
+ MI.setDesc(TII->get(SOPKOpc));
+ }
+
+ return;
+ }
+
+ const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
+
+ if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
+ (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
+ MI.setDesc(NewDesc);
+ }
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -310,6 +368,12 @@ bool SIShrinkInstructions::runOnMachineF
}
}
+ // Try to use s_cmpk_*
+ if (MI.isCompare() && TII->isSOPC(MI)) {
+ shrinkScalarCompare(TII, MI);
+ continue;
+ }
+
// Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
const MachineOperand &Src = MI.getOperand(1);
Modified: llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td Fri Sep 16 16:41:16 2016
@@ -473,6 +473,11 @@ class SOPK_Real64<bits<5> op, SOPK_Pseud
let Inst{63-32} = imm;
}
+class SOPKInstTable <bit is_sopk, string cmpOp = ""> {
+ bit IsSOPK = is_sopk;
+ string BaseCmpOp = cmpOp;
+}
+
class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
opName,
(outs SReg_32:$sdst),
@@ -480,12 +485,12 @@ class SOPK_32 <string opName, list<dag>
"$sdst, $simm16",
pattern>;
-class SOPK_SCC <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
+class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo <
opName,
(outs),
(ins SReg_32:$sdst, u16imm:$simm16),
- "$sdst, $simm16",
- pattern> {
+ "$sdst, $simm16", []>,
+ SOPKInstTable<1, base_op>{
let Defs = [SCC];
}
@@ -521,18 +526,21 @@ let isCompare = 1 in {
// [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
// >;
-def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32">;
-def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32">;
-def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32">;
-def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32">;
-def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32">;
-def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32">;
-def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32">;
-def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32">;
-def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32">;
-def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32">;
-def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32">;
-def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32">;
+def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">;
+def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">;
+def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">;
+def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">;
+def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">;
+def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">;
+
+let SOPKZext = 1 in {
+def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">;
+def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">;
+def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">;
+def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">;
+def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">;
+def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">;
+} // End SOPKZext = 1
} // End isCompare = 1
let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
@@ -613,8 +621,14 @@ class SOPC_Helper <bits<7> op, RegisterO
[(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > {
}
-class SOPC_CMP_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
- : SOPC_Helper<op, SSrc_b32, i32, opName, cond>;
+class SOPC_CMP_32<bits<7> op, string opName,
+ PatLeaf cond = COND_NULL, string revOp = opName>
+ : SOPC_Helper<op, SSrc_b32, i32, opName, cond>,
+ Commutable_REV<revOp, !eq(revOp, opName)>,
+ SOPKInstTable<0, opName> {
+ let isCompare = 1;
+ let isCommutable = 1;
+}
class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
: SOPC_Base<op, SSrc_b32, SSrc_b32, opName, pattern>;
@@ -622,19 +636,19 @@ class SOPC_32<bits<7> op, string opName,
class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
: SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
-
def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>;
def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>;
def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>;
def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>;
-def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT>;
-def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE>;
+def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
+def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">;
def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>;
-def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE >;
+def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>;
def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>;
def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>;
-def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT>;
-def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE>;
+def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">;
+def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">;
+
def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">;
def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">;
def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">;
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll?rev=281780&r1=281779&r2=281780&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll Fri Sep 16 16:41:16 2016
@@ -92,7 +92,7 @@ declare float @llvm.fabs.f32(float) noun
; SI: s_cmp_gt_i32
; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]
-; SI: s_cmp_gt_i32
+; SI: s_cmpk_gt_i32
; SI-NEXT: s_cbranch_scc1 [[ENDPGM]]
; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]
Added: llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll?rev=281780&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll Fri Sep 16 16:41:16 2016
@@ -0,0 +1,573 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; Since this intrinsic is exposed as a constant after isel, use it to
+; defeat the DAG's compare with constant canonicalizations.
+declare i32 @llvm.amdgcn.groupstaticsize() #1
+
+ at lds = addrspace(3) global [512 x i32] undef, align 4
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 4{{$}}
+define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 4
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x7fff{{$}}
+define void @br_scc_eq_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 32767
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max_p1:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_eq_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ne_i32_simm16_max_p1:
+; GCN: s_cmpk_lg_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_ne_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ne i32 %cond, 32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_eq_i32_simm16_min(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, -32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0xffff7fff{{$}}
+define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, -32769
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm15_max:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
+define void @br_scc_eq_i32_uimm15_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65535
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
+define void @br_scc_eq_i32_uimm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65535
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0x10000{{$}}
+define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65536
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}br_scc_eq_i32:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ne_i32:
+; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ne i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp sgt i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x7fff{{$}}
+define void @br_scc_sgt_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp sgt i32 %cond, 32767
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max_p1:
+; GCN: s_cmp_gt_i32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp sgt i32 %cond, 32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sge_i32:
+; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sge i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_slt_i32:
+; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp slt i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sle_i32:
+; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sle i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ugt_i32:
+; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ugt i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_uge_i32:
+; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp uge i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ult i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16:
+; GCN: s_cmp_lt_u32 s2, 0xffff8000
+define void @br_scc_ult_i32_min_simm16(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ult i32 %cond, -32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16_m1:
+; GCN: s_cmp_lt_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
+define void @br_scc_ult_i32_min_simm16_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ult i32 %cond, -32769
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ule_i32:
+; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ule i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_eq_i32:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp eq i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ne_i32:
+; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ne i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sgt_i32:
+; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sgt i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sge_i32:
+; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sge i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_slt_i32:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp slt i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sle_i32:
+; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sle i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ugt_i32:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ugt i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_uge_i32:
+; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp uge i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ult_i32:
+; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ult i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ule_i32:
+; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ule i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32_non_u16:
+; GCN: s_cmp_lt_u32 s2, 0xfffff7ff
+define void @br_scc_ult_i32_non_u16(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %not.size = xor i32 %size, -1
+ %cmp0 = icmp ult i32 %cond, %not.size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
More information about the llvm-commits
mailing list