[PATCH] D82652: AMDGPU/GlobalISel: Select icmp intrinsic
Petar Avramovic via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 30 02:08:44 PDT 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd7173826331e: AMDGPU/GlobalISel: Select icmp intrinsic (authored by Petar.Avramovic).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D82652/new/
https://reviews.llvm.org/D82652
Files:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_ps void @test_intr_icmp_eq_i64(i64 addrspace(1)* %out, i32 %src) #0 {
+; GCN-LABEL: test_intr_icmp_eq_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0x64, v2
+; GCN-NEXT: v_mov_b32_e32 v3, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GCN-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #1 {
+; GCN-LABEL: test_intr_icmp_ne_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0x64, v2
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: global_store_dword v[0:1], v2, off
+; GCN-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 33)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
+declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
+attributes #0 = { "target-features"="+wavefrontsize64" }
+attributes #1 = { "target-features"="+wavefrontsize32" }
Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -106,6 +106,7 @@
bool selectInterpP1F16(MachineInstr &MI) const;
bool selectDivScale(MachineInstr &MI) const;
+ bool selectIntrinsicIcmp(MachineInstr &MI) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
bool selectEndCfIntrinsic(MachineInstr &MI) const;
Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -889,6 +889,8 @@
return constrainCopyLikeIntrin(I, AMDGPU::WWM);
case Intrinsic::amdgcn_div_scale:
return selectDivScale(I);
+ case Intrinsic::amdgcn_icmp:
+ return selectIntrinsicIcmp(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -1009,6 +1011,34 @@
return Ret;
}
+bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
+ Register Dst = I.getOperand(0).getReg();
+ if (isVCC(Dst, *MRI))
+ return false;
+
+ if (MRI->getType(Dst).getSizeInBits() != STI.getWavefrontSize())
+ return false;
+
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ Register SrcReg = I.getOperand(2).getReg();
+ unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
+ auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
+
+ int Opcode = getV_CMPOpcode(Pred, Size);
+ if (Opcode == -1)
+ return false;
+
+ MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst)
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), *TRI.getBoolRC(),
+ *MRI);
+ bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+}
+
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D82652.274375.patch
Type: text/x-patch
Size: 3902 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200630/baa6b9bb/attachment.bin>
More information about the llvm-commits
mailing list