[clang] c714d03 - [AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32
Stanislav Mekhanoshin via cfe-commits
cfe-commits at lists.llvm.org
Thu May 6 16:25:23 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-05-06T16:17:33-07:00
New Revision: c714d037857f9c8e3bbe32e22ec22279121c378d
URL: https://github.com/llvm/llvm-project/commit/c714d037857f9c8e3bbe32e22ec22279121c378d
DIFF: https://github.com/llvm/llvm-project/commit/c714d037857f9c8e3bbe32e22ec22279121c378d.diff
LOG: [AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32
Differential Revision: https://reviews.llvm.org/D102022
Added:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 9677b1aadb518..7dcbf9a096961 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -182,6 +182,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
+TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
//===----------------------------------------------------------------------===//
// GFX9+ only builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 49faf069c8c68..be594585fad6d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -7,6 +7,7 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
typedef unsigned long ulong;
+typedef unsigned int uint;
// CHECK-LABEL: @test_div_fixup_f16
// CHECK: call half @llvm.amdgcn.div.fixup.f16
@@ -137,3 +138,10 @@ void test_s_memtime(global ulong* out)
{
*out = __builtin_amdgcn_s_memtime();
}
+
+// CHECK-LABEL: @test_perm
+// CHECK: call i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
+void test_perm(global uint* out, uint a, uint b, uint s)
+{
+ *out = __builtin_amdgcn_perm(a, b, s);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
index 849c826c5b8cb..c23f4a452d83e 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
@@ -2,7 +2,8 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu hawaii -verify -S -o - %s
-void test_vi_s_dcache_wb()
+void test_vi_builtins()
{
__builtin_amdgcn_s_dcache_wb(); // expected-error {{'__builtin_amdgcn_s_dcache_wb' needs target feature gfx8-insts}}
+ (void)__builtin_amdgcn_perm(1, 2, 3); // expected-error {{'__builtin_amdgcn_perm' needs target feature gfx8-insts}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 7b62b9de79b22..46a7aeb39c9a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1716,6 +1716,12 @@ def int_amdgcn_ds_bpermute :
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
+// llvm.amdgcn.perm <src0> <src1> <selector>
+def int_amdgcn_perm :
+ GCCBuiltin<"__builtin_amdgcn_perm">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index c0cb1781abe34..d63bd2e9eb2e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -313,7 +313,7 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
SDTCisInt<4>]>,
[]>;
-def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
+def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
// SI+ export
def AMDGPUExportOp : SDTypeProfile<0, 8, [
@@ -463,3 +463,7 @@ def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp)
def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc),
[(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc),
(AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>;
+
+def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
+ (AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 482aef524f6f0..2126e7a27268e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3949,6 +3949,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_u8_f32:
case Intrinsic::amdgcn_alignbit:
case Intrinsic::amdgcn_alignbyte:
+ case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_fdot2:
case Intrinsic::amdgcn_sdot2:
case Intrinsic::amdgcn_udot2:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e6eae914d54f3..5eca427052bcf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6695,6 +6695,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_alignbit:
return DAG.getNode(ISD::FSHR, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_perm:
+ return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_reloc_constant: {
Module *M = const_cast<Module *>(MF.getFunction().getParent());
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
new file mode 100644
index 0000000000000..4d9ba39b8e0b8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
@@ -0,0 +1,47 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
+
+; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
+define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_v_v_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}}
+define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_s_v_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}}
+define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_s_s_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_v_s_i:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1
+define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
More information about the cfe-commits
mailing list