[llvm] r281513 - AMDGPU: Use SOPK compare instructions

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 14 11:03:54 PDT 2016


Author: arsenm
Date: Wed Sep 14 13:03:53 2016
New Revision: 281513

URL: http://llvm.org/viewvc/llvm-project?rev=281513&view=rev
Log:
AMDGPU: Use SOPK compare instructions

Added:
    llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIDefines.h
    llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
    llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=281513&r1=281512&r2=281513&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Wed Sep 14 13:03:53 2016
@@ -43,7 +43,8 @@ enum {
   SGPRSpill = 1 << 24,
   VOPAsmPrefer32Bit = 1 << 25,
   Gather4 = 1 << 26,
-  DisableWQM = 1 << 27
+  DisableWQM = 1 << 27,
+  SOPK_ZEXT = 1 << 28
 };
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=281513&r1=281512&r2=281513&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Wed Sep 14 13:03:53 2016
@@ -56,6 +56,10 @@ class InstSI <dag outs, dag ins, string
   // Whether WQM _must_ be disabled for this instruction.
   field bits<1> DisableWQM = 0;
 
+  // Most sopk treat the immediate as a signed 16-bit, however some
+  // use it as unsigned.
+  field bits<1> SOPKZext = 0;
+
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = VM_CNT;
   let TSFlags{1} = EXP_CNT;
@@ -89,6 +93,7 @@ class InstSI <dag outs, dag ins, string
   let TSFlags{25} = VOPAsmPrefer32Bit;
   let TSFlags{26} = Gather4;
   let TSFlags{27} = DisableWQM;
+  let TSFlags{28} = SOPKZext;
 
   let SchedRW = [Write32Bit];
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=281513&r1=281512&r2=281513&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Wed Sep 14 13:03:53 2016
@@ -391,6 +391,14 @@ public:
     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
   }
 
+  static bool sopkIsZext(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
+  }
+
+  bool sopkIsZext(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
+  }
+
   bool isVGPRCopy(const MachineInstr &MI) const {
     assert(MI.isCopy());
     unsigned Dest = MI.getOperand(0).getReg();
@@ -603,6 +611,9 @@ namespace AMDGPU {
   LLVM_READONLY
   int getAtomicNoRetOp(uint16_t Opcode);
 
+  LLVM_READONLY
+  int getSOPKOp(uint16_t Opcode);
+
   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=281513&r1=281512&r2=281513&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Sep 14 13:03:53 2016
@@ -1193,7 +1193,7 @@ class VOP <string opName> {
   string OpName = opName;
 }
 
-class VOP2_REV <string revOp, bit isOrig> {
+class Commutable_REV <string revOp, bit isOrig> {
   string RevOp = revOp;
   bit IsOrig = isOrig;
 }
@@ -1325,7 +1325,7 @@ multiclass VOP2SI_m <vop2 op, string opN
                      string revOp> {
 
   def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
-           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+           Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
 
   def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
 }
@@ -1334,7 +1334,7 @@ multiclass VOP2_m <vop2 op, string opNam
                    string revOp> {
 
   def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
-           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+           Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
 
   def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
 
@@ -1523,7 +1523,7 @@ multiclass VOP3_2_m <vop op, dag outs, d
                      bit HasMods = 1> {
 
   def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+           Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
 
   def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
             VOP3DisableFields<1, 0, HasMods>;
@@ -1537,7 +1537,7 @@ multiclass VOP3SI_2_m <vop op, dag outs,
                      bit HasMods = 1> {
 
   def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+           Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
 
   def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
             VOP3DisableFields<1, 0, HasMods>;
@@ -1578,7 +1578,7 @@ multiclass VOP3_C_m <vop op, dag outs, d
                      string revOp, list<SchedReadWrite> sched> {
 
   def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+           Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
     let Defs = !if(defExec, [EXEC], []);
     let SchedRW = sched;
   }
@@ -1829,7 +1829,7 @@ multiclass VOPC_m <vopc op, dag ins, str
                    string revOpName = "", string asm = opName#"_e32 "#op_asm,
                    string alias_asm = opName#" "#op_asm> {
   def "" : VOPC_Pseudo <ins, pattern, opName>,
-           VOP2_REV<revOpName#"_e32", !eq(revOpName, opName)> {
+           Commutable_REV<revOpName#"_e32", !eq(revOpName, opName)> {
     let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
     let SchedRW = sched;
     let isConvergent = DefExec;
@@ -2106,7 +2106,7 @@ def getMaskedMIMGOp : InstrMapping {
 
 // Maps an commuted opcode to its original version
 def getCommuteOrig : InstrMapping {
-  let FilterClass = "VOP2_REV";
+  let FilterClass = "Commutable_REV";
   let RowFields = ["RevOp"];
   let ColFields = ["IsOrig"];
   let KeyCol = ["0"];
@@ -2115,31 +2115,13 @@ def getCommuteOrig : InstrMapping {
 
 // Maps an original opcode to its commuted version
 def getCommuteRev : InstrMapping {
-  let FilterClass = "VOP2_REV";
+  let FilterClass = "Commutable_REV";
   let RowFields = ["RevOp"];
   let ColFields = ["IsOrig"];
   let KeyCol = ["1"];
   let ValueCols = [["0"]];
 }
 
-def getCommuteCmpOrig : InstrMapping {
-  let FilterClass = "VOP2_REV";
-  let RowFields = ["RevOp"];
-  let ColFields = ["IsOrig"];
-  let KeyCol = ["0"];
-  let ValueCols = [["1"]];
-}
-
-// Maps an original opcode to its commuted version
-def getCommuteCmpRev : InstrMapping {
-  let FilterClass = "VOP2_REV";
-  let RowFields = ["RevOp"];
-  let ColFields = ["IsOrig"];
-  let KeyCol = ["1"];
-  let ValueCols = [["0"]];
-}
-
-
 def getMCOpcodeGen : InstrMapping {
   let FilterClass = "SIMCInstr";
   let RowFields = ["PseudoInstr"];
@@ -2149,6 +2131,15 @@ def getMCOpcodeGen : InstrMapping {
                    [!cast<string>(SIEncodingFamily.VI)]];
 }
 
+// Get equivalent SOPK instruction.
+def getSOPKOp : InstrMapping {
+  let FilterClass = "SOPKInstTable";
+  let RowFields = ["BaseCmpOp"];
+  let ColFields = ["IsSOPK"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
 def getAddr64Inst : InstrMapping {
   let FilterClass = "MUBUFAddr64Table";
   let RowFields = ["OpName"];

Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=281513&r1=281512&r2=281513&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Wed Sep 14 13:03:53 2016
@@ -188,6 +188,26 @@ static bool isKImmOperand(const SIInstrI
   return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
 }
 
+static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+  return isUInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+}
+
+static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
+                                 const MachineOperand &Src,
+                                 bool &IsUnsigned) {
+  if (isInt<16>(Src.getImm())) {
+    IsUnsigned = false;
+    return !TII->isInlineConstant(Src, 4);
+  }
+
+  if (isUInt<16>(Src.getImm())) {
+    IsUnsigned = true;
+    return !TII->isInlineConstant(Src, 4);
+  }
+
+  return false;
+}
+
 /// Copy implicit register operands from specified instruction to this
 /// instruction that are not part of the instruction definition.
 static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
@@ -202,6 +222,42 @@ static void copyExtraImplicitOps(Machine
   }
 }
 
+static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
+  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
+  // get constants on the RHS.
+  if (!MI.getOperand(0).isReg())
+    TII->commuteInstruction(MI, false, 0, 1);
+
+  const MachineOperand &Src1 = MI.getOperand(1);
+  if (!Src1.isImm())
+    return;
+
+  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
+  if (SOPKOpc == -1)
+    return;
+
+  // eq/ne is special because the imm16 can be treated as signed or unsigned.
+  if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) {
+    bool HasUImm;
+    if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
+      if (HasUImm) {
+        SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ?
+          AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32;
+      }
+
+      MI.setDesc(TII->get(SOPKOpc));
+    }
+
+    return;
+  }
+
+  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
+  if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
+      isKImmOperand(TII, Src1)) {
+    MI.setDesc(NewDesc);
+  }
+}
+
 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
@@ -310,6 +366,12 @@ bool SIShrinkInstructions::runOnMachineF
         }
       }
 
+      // Try to use s_cmpk_*
+      if (MI.isCompare() && TII->isSOPC(MI)) {
+        shrinkScalarCompare(TII, MI);
+        continue;
+      }
+
       // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
       if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
         const MachineOperand &Src = MI.getOperand(1);

Modified: llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td?rev=281513&r1=281512&r2=281513&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td Wed Sep 14 13:03:53 2016
@@ -473,6 +473,11 @@ class SOPK_Real64<bits<5> op, SOPK_Pseud
   let Inst{63-32} = imm;
 }
 
+class SOPKInstTable <bit is_sopk, string cmpOp = ""> {
+  bit IsSOPK = is_sopk;
+  string BaseCmpOp = cmpOp;
+}
+
 class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
   opName,
   (outs SReg_32:$sdst),
@@ -480,12 +485,12 @@ class SOPK_32 <string opName, list<dag>
   "$sdst, $simm16",
   pattern>;
 
-class SOPK_SCC <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
+class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo <
   opName,
   (outs),
   (ins SReg_32:$sdst, u16imm:$simm16),
-  "$sdst, $simm16",
-  pattern> {
+  "$sdst, $simm16", []>,
+  SOPKInstTable<1, base_op>{
   let Defs = [SCC];
 }
 
@@ -521,18 +526,21 @@ let isCompare = 1 in {
 //   [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
 // >;
 
-def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32">;
-def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32">;
-def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32">;
-def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32">;
-def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32">;
-def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32">;
-def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32">;
-def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32">;
-def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32">;
-def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32">;
-def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32">;
-def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32">;
+def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">;
+def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">;
+def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">;
+def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">;
+def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">;
+def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">;
+
+let SOPKZext = 1 in {
+def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">;
+def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">;
+def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">;
+def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">;
+def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">;
+def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">;
+} // End SOPKZext = 1
 } // End isCompare = 1
 
 let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
@@ -613,8 +621,14 @@ class SOPC_Helper <bits<7> op, RegisterO
   [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > {
 }
 
-class SOPC_CMP_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
-  : SOPC_Helper<op, SSrc_b32, i32, opName, cond>;
+class SOPC_CMP_32<bits<7> op, string opName,
+                  PatLeaf cond = COND_NULL, string revOp = opName>
+  : SOPC_Helper<op, SSrc_b32, i32, opName, cond>,
+    Commutable_REV<revOp, !eq(revOp, opName)>,
+    SOPKInstTable<0, opName> {
+  let isCompare = 1;
+  let isCommutable = 1;
+}
 
 class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
   : SOPC_Base<op, SSrc_b32, SSrc_b32, opName, pattern>;
@@ -622,19 +636,19 @@ class SOPC_32<bits<7> op, string opName,
 class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
   : SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
 
-
 def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>;
 def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>;
 def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>;
 def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>;
-def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT>;
-def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE>;
+def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
+def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">;
 def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>;
-def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE >;
+def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>;
 def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>;
 def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>;
-def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT>;
-def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE>;
+def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">;
+def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">;
+
 def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">;
 def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">;
 def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">;

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll?rev=281513&r1=281512&r2=281513&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll Wed Sep 14 13:03:53 2016
@@ -92,7 +92,7 @@ declare float @llvm.fabs.f32(float) noun
 ; SI: s_cmp_gt_i32
 ; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]
 
-; SI: s_cmp_gt_i32
+; SI: s_cmpk_gt_i32
 ; SI-NEXT: s_cbranch_scc1 [[ENDPGM]]
 
 ; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]

Added: llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll?rev=281513&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll Wed Sep 14 13:03:53 2016
@@ -0,0 +1,555 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; Since this intrinsic is exposed as a constant after isel, use it to
+; defeat the DAG's compare with constant canonicalizations.
+declare i32 @llvm.amdgcn.groupstaticsize() #1
+
+ at lds = addrspace(3) global [512 x i32] undef, align 4
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 4{{$}}
+define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, 4
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x7fff{{$}}
+define void @br_scc_eq_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, 32767
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max_p1:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_eq_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, 32768
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ne_i32_simm16_max_p1:
+; GCN: s_cmpk_lg_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_ne_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp ne i32 %cond, 32768
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_eq_i32_simm16_min(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, -32768
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0xffff7fff{{$}}
+define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, -32769
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm15_max:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
+define void @br_scc_eq_i32_uimm15_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, 65535
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
+define void @br_scc_eq_i32_uimm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, 65535
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0x10000{{$}}
+define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, 65536
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; GCN-LABEL: {{^}}br_scc_eq_i32:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp eq i32 %cond, 65
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ne_i32:
+; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp ne i32 %cond, 65
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp sgt i32 %cond, 65
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x7fff{{$}}
+define void @br_scc_sgt_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp sgt i32 %cond, 32767
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max_p1:
+; GCN: s_cmp_gt_i32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp sgt i32 %cond, 32768
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sge_i32:
+; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp sge i32 %cond, %size
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_slt_i32:
+; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp slt i32 %cond, 65
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sle_i32:
+; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp sle i32 %cond, %size
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ugt_i32:
+; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp ugt i32 %cond, %size
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_uge_i32:
+; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp uge i32 %cond, %size
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp ult i32 %cond, 65
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_ult_i32_min_simm16(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp ult i32 %cond, -32768
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16_m1:
+; GCN: s_cmp_lt_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
+define void @br_scc_ult_i32_min_simm16_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %cmp0 = icmp ult i32 %cond, -32769
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "", ""()
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ule_i32:
+; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp ule i32 %cond, %size
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_eq_i32:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp eq i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ne_i32:
+; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp ne i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sgt_i32:
+; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp sgt i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sge_i32:
+; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp sge i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_slt_i32:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp slt i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sle_i32:
+; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp sle i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ugt_i32:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp ugt i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_uge_i32:
+; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp uge i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ult_i32:
+; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp ult i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ule_i32:
+; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %cmp0 = icmp ule i32 %size, %cond
+  br i1 %cmp0, label %endif, label %if
+
+if:
+  call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+  br label %endif
+
+endif:
+  store volatile i32 1, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }




More information about the llvm-commits mailing list