[llvm] d717382 - AMDGPU/GlobalISel: Select icmp intrinsic

Tue Jun 30 01:58:15 PDT 2020

Author: Petar Avramovic
Date: 2020-06-30T10:57:41+02:00
New Revision: d7173826331eb2c263ecee607e86cebcf1c08ad7

URL: https://github.com/llvm/llvm-project/commit/d7173826331eb2c263ecee607e86cebcf1c08ad7
DIFF: https://github.com/llvm/llvm-project/commit/d7173826331eb2c263ecee607e86cebcf1c08ad7.diff

LOG: AMDGPU/GlobalISel: Select icmp intrinsic

Select into corresponding V_CMP instruction based on CmpInst predicate,
stored as immediate, in last operand.

Differential Revision: https://reviews.llvm.org/D82652

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6380d6b91d61..c3d5e78964c8 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -889,6 +889,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
     return constrainCopyLikeIntrin(I, AMDGPU::WWM);
   case Intrinsic::amdgcn_div_scale:
     return selectDivScale(I);
+  case Intrinsic::amdgcn_icmp:
+    return selectIntrinsicIcmp(I);
   default:
     return selectImpl(I, *CoverageInfo);
   }
@@ -1009,6 +1011,34 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
   return Ret;
 }
 
+bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
+  Register Dst = I.getOperand(0).getReg();
+  if (isVCC(Dst, *MRI))
+    return false;
+
+  if (MRI->getType(Dst).getSizeInBits() != STI.getWavefrontSize())
+    return false;
+
+  MachineBasicBlock *BB = I.getParent();
+  const DebugLoc &DL = I.getDebugLoc();
+  Register SrcReg = I.getOperand(2).getReg();
+  unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
+  auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
+
+  int Opcode = getV_CMPOpcode(Pred, Size);
+  if (Opcode == -1)
+    return false;
+
+  MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst)
+                           .add(I.getOperand(2))
+                           .add(I.getOperand(3));
+  RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), *TRI.getBoolRC(),
+                               *MRI);
+  bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+  I.eraseFromParent();
+  return Ret;
+}
+
 bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
   // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
   // SelectionDAG uses for wave32 vs wave64.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 0ac6788c69b8..f8a8b5db4b55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -106,6 +106,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
 
   bool selectInterpP1F16(MachineInstr &MI) const;
   bool selectDivScale(MachineInstr &MI) const;
+  bool selectIntrinsicIcmp(MachineInstr &MI) const;
   bool selectG_INTRINSIC(MachineInstr &I) const;
 
   bool selectEndCfIntrinsic(MachineInstr &MI) const;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
new file mode 100644
index 000000000000..dc04f15967a7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_ps void @test_intr_icmp_eq_i64(i64 addrspace(1)* %out, i32 %src) #0 {
+; GCN-LABEL: test_intr_icmp_eq_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0x64, v2
+; GCN-NEXT:    v_mov_b32_e32 v3, s1
+; GCN-NEXT:    v_mov_b32_e32 v2, s0
+; GCN-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
+; GCN-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #1 {
+; GCN-LABEL: test_intr_icmp_ne_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_cmp_ne_u32_e64 s0, 0x64, v2
+; GCN-NEXT:    ; implicit-def: $vcc_hi
+; GCN-NEXT:    v_mov_b32_e32 v2, s0
+; GCN-NEXT:    global_store_dword v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 33)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
+declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
+attributes #0 = { "target-features"="+wavefrontsize64" }
+attributes #1 = { "target-features"="+wavefrontsize32" }