[llvm] 77ce2e2 - [AMDGPU] Add Relocation Constant Support
Jakub Kuderski via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 30 10:49:38 PDT 2020
Author: Jakub Kuderski
Date: 2020-03-30T13:49:20-04:00
New Revision: 77ce2e21a87768370b97a027e5053b73e8f438a7
URL: https://github.com/llvm/llvm-project/commit/77ce2e21a87768370b97a027e5053b73e8f438a7
DIFF: https://github.com/llvm/llvm-project/commit/77ce2e21a87768370b97a027e5053b73e8f438a7.diff
LOG: [AMDGPU] Add Relocation Constant Support
Summary:
This change adds amdgcn.reloc.constant intrinsic to the amdgpu backend, which will compile into a relocation entry in the resulting elf.
The intrinsics takes a MetadataNode (String) as its only argument, which specifies the symbol name of the relocation entry.
`SelectionDAGBuilder::getValueImpl` is changed to allow metadata operands passed through to ISel.
Author: csyonghe <yonghe at google.com>
Reviewers: tpr, nhaehnle
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76440
Added:
llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index c01db52b1622..1eb504bd6e80 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1875,4 +1875,10 @@ def int_amdgcn_fdiv_fast : Intrinsic<
[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable]
>;
+
+// Represent a relocation constant.
+def int_amdgcn_reloc_constant : Intrinsic<
+ [llvm_i32_ty], [llvm_metadata_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f2936fe93df4..042907d34907 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1604,6 +1604,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
+ if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
+ return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
+ }
llvm_unreachable("Can't get register for value!");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 269434d31e21..09e18cc0c2fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1766,11 +1766,23 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
-
- // FIXME: Handle non-constant offsets.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
- if (!C)
+ if (!C) {
+ if (ByteOffsetNode.getValueType().isScalarInteger() &&
+ ByteOffsetNode.getValueType().getSizeInBits() == 32) {
+ Offset = ByteOffsetNode;
+ Imm = false;
+ return true;
+ }
+ if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
+ if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
+ Offset = ByteOffsetNode.getOperand(0);
+ Imm = false;
+ return true;
+ }
+ }
return false;
+ }
SDLoc SL(ByteOffsetNode);
GCNSubtarget::Generation Gen = Subtarget->getGeneration();
@@ -1835,7 +1847,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
// wraparound, because s_load instructions perform the addition in 64 bits.
if ((Addr.getValueType() != MVT::i32 ||
Addr->getFlags().hasNoUnsignedWrap()) &&
- CurDAG->isBaseWithConstantOffset(Addr)) {
+ (CurDAG->isBaseWithConstantOffset(Addr) ||
+ Addr.getOpcode() == ISD::ADD)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4ff42769b470..f431c8e5256c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6022,6 +6022,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_alignbit:
return DAG.getNode(ISD::FSHR, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_reloc_constant: {
+ Module *M = const_cast<Module *>(MF.getFunction().getParent());
+ const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
+ auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
+ auto RelocSymbol = cast<GlobalVariable>(
+ M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
+ SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0,
+ SIInstrInfo::MO_ABS32_LO);
+ return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
new file mode 100644
index 000000000000..fde3ab8c6d4a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -0,0 +1,62 @@
+; Test that DAG->DAG ISel is able to pick up the S_LOAD_DWORDX4_SGPR instruction that fetches the offset
+; from a register.
+
+; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
+
+; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
+; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load 16 from %ir.13, addrspace 4)
+
+define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
+.entry:
+ %5 = call i64 @llvm.amdgcn.s.getpc() #3
+ %6 = bitcast i64 %5 to <2 x i32>
+ %7 = insertelement <2 x i32> %6, i32 %resNode0, i32 0
+ %8 = bitcast <2 x i32> %7 to i64
+ %9 = inttoptr i64 %8 to [4294967295 x i8] addrspace(4)*
+ %10 = call i32 @llvm.amdgcn.reloc.constant(metadata !4)
+ %11 = zext i32 %10 to i64
+ %12 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %9, i64 0, i64 %11
+ %13 = bitcast i8 addrspace(4)* %12 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5
+ %14 = load <4 x i32>, <4 x i32> addrspace(4)* %13, align 16, !invariant.load !5
+ %15 = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %14, i32 0, i32 0)
+ call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %15, <4 x i32> %14, i32 0, i32 0, i32 0)
+ ret void
+}
+
+declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
+; Function Attrs: nounwind writeonly
+declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.reloc.constant(metadata) #3
+
+; Function Attrs: nounwind readnone speculatable
+declare i64 @llvm.amdgcn.s.getpc() #3
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) #1
+
+attributes #0 = { argmemonly nounwind willreturn }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "amdgpu-unroll-threshold"="700" }
+attributes #3 = { nounwind readnone speculatable }
+attributes #4 = { nounwind writeonly }
+
+!llpc.compute.mode = !{!0}
+!llpc.options = !{!1}
+!llpc.options.CS = !{!2}
+!llpc.user.data.nodes = !{!3, !4, !5, !6}
+!amdgpu.pal.metadata.msgpack = !{!7}
+
+!0 = !{i32 2, i32 3, i32 1}
+!1 = !{i32 245227952, i32 996822128, i32 2024708198, i32 497230408}
+!2 = !{i32 1381820427, i32 1742110173, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64}
+!3 = !{!"DescriptorTableVaPtr", i32 0, i32 1, i32 1}
+!4 = !{!"DescriptorBuffer", i32 4, i32 8, i32 0, i32 0}
+!5 = !{!"DescriptorTableVaPtr", i32 1, i32 1, i32 1}
+!6 = !{!"DescriptorBuffer", i32 4, i32 8, i32 1, i32 0}
+!7 = !{!"\82\B0amdpal.pipelines\91\88\A4.api\A6Vulkan\B0.hardware_stages\81\A3.cs\82\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\B7.internal_pipeline_hash\92\CF;jLp\0E\9D\E1\B0\CF\1D\A3\22Hx\AE\98f\AA.registers\88\CD.\07\02\CD.\08\03\CD.\09\01\CD.\12\CE\00,\00\00\CD.\13\CD\0F\88\CD.@\CE\10\00\00\00\CD.B\00\CD.C\01\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CFg\D6}\DDR\\\E8\0B\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\02\AEamdpal.version\92\02\03"}
+!8 = !{i32 5}
+!9 = !{!"doff_0_0_b"}
+!10 = !{}
+!11 = !{!"doff_1_0_b"}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
new file mode 100644
index 000000000000..569f0101e3be
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -filetype=obj -o %t.o < %s && llvm-readobj -relocations %t.o | FileCheck --check-prefix=ELF %s
+; GCN-LABEL: {{^}}ps_main:
+; GCN: v_mov_b32_{{.*}} v[[relocreg:[0-9]+]], doff_0_0_b at abs32@lo
+; GCN-NEXT: exp {{.*}} v[[relocreg]], {{.*}}
+; GCN-NEXT: s_endpgm
+; GCN-NEXT: .Lfunc_end
+
+; ELF: Relocations [
+; ELF-NEXT: Section (3) .rel.text {
+; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b {{.*}}
+
+define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
+ %rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
+ %rcf = bitcast i32 %rc to float
+ call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %rcf, float undef, float undef, float undef, i1 immarg false, i1 immarg false) #0
+ ret void
+}
+
+; Function Attrs: inaccessiblememonly nounwind
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.reloc.constant(metadata) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { inaccessiblememonly nounwind }
+attributes #2 = { nounwind readnone speculatable }
+
+!1 = !{!"doff_0_0_b"}
More information about the llvm-commits
mailing list