[clang] c714d03 - [AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32

Stanislav Mekhanoshin via cfe-commits cfe-commits at lists.llvm.org
Thu May 6 16:25:23 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-05-06T16:17:33-07:00
New Revision: c714d037857f9c8e3bbe32e22ec22279121c378d

URL: https://github.com/llvm/llvm-project/commit/c714d037857f9c8e3bbe32e22ec22279121c378d
DIFF: https://github.com/llvm/llvm-project/commit/c714d037857f9c8e3bbe32e22ec22279121c378d.diff

LOG: [AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32

Differential Revision: https://reviews.llvm.org/D102022

Added: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll

Modified: 
    clang/include/clang/Basic/BuiltinsAMDGPU.def
    clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
    clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
    llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 9677b1aadb518..7dcbf9a096961 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -182,6 +182,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
 TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
 TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
 TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
+TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
 
 //===----------------------------------------------------------------------===//
 // GFX9+ only builtins.

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 49faf069c8c68..be594585fad6d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -7,6 +7,7 @@
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 typedef unsigned long ulong;
+typedef unsigned int  uint;
 
 // CHECK-LABEL: @test_div_fixup_f16
 // CHECK: call half @llvm.amdgcn.div.fixup.f16
@@ -137,3 +138,10 @@ void test_s_memtime(global ulong* out)
 {
   *out = __builtin_amdgcn_s_memtime();
 }
+
+// CHECK-LABEL: @test_perm
+// CHECK: call i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
+void test_perm(global uint* out, uint a, uint b, uint s)
+{
+  *out = __builtin_amdgcn_perm(a, b, s);
+}

diff  --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
index 849c826c5b8cb..c23f4a452d83e 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
@@ -2,7 +2,8 @@
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu hawaii -verify -S -o - %s
 
-void test_vi_s_dcache_wb()
+void test_vi_builtins()
 {
   __builtin_amdgcn_s_dcache_wb(); // expected-error {{'__builtin_amdgcn_s_dcache_wb' needs target feature gfx8-insts}}
+  (void)__builtin_amdgcn_perm(1, 2, 3); // expected-error {{'__builtin_amdgcn_perm' needs target feature gfx8-insts}}
 }

diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 7b62b9de79b22..46a7aeb39c9a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1716,6 +1716,12 @@ def int_amdgcn_ds_bpermute :
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
      [IntrNoMem, IntrConvergent, IntrWillReturn]>;
 
+// llvm.amdgcn.perm <src0> <src1> <selector>
+def int_amdgcn_perm :
+  GCCBuiltin<"__builtin_amdgcn_perm">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+     [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
 //===----------------------------------------------------------------------===//
 // GFX10 Intrinsics
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index c0cb1781abe34..d63bd2e9eb2e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -313,7 +313,7 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
                                        SDTCisInt<4>]>,
                   []>;
 
-def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
+def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
 
 // SI+ export
 def AMDGPUExportOp : SDTypeProfile<0, 8, [
@@ -463,3 +463,7 @@ def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp)
 def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc),
   [(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc),
    (AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>;
+
+def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+  [(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
+   (AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 482aef524f6f0..2126e7a27268e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3949,6 +3949,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_cvt_pk_u8_f32:
     case Intrinsic::amdgcn_alignbit:
     case Intrinsic::amdgcn_alignbyte:
+    case Intrinsic::amdgcn_perm:
     case Intrinsic::amdgcn_fdot2:
     case Intrinsic::amdgcn_sdot2:
     case Intrinsic::amdgcn_udot2:

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e6eae914d54f3..5eca427052bcf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6695,6 +6695,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::amdgcn_alignbit:
     return DAG.getNode(ISD::FSHR, DL, VT,
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+  case Intrinsic::amdgcn_perm:
+    return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3));
   case Intrinsic::amdgcn_reloc_constant: {
     Module *M = const_cast<Module *>(MF.getFunction().getParent());
     const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
new file mode 100644
index 0000000000000..4d9ba39b8e0b8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
@@ -0,0 +1,47 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
+
+; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
+define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_v_v_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}}
+define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_s_v_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}}
+define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_s_s_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_v_s_i:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1
+define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }


        


More information about the cfe-commits mailing list