[llvm] [AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. (PR #130041)
Mariusz Sikora via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 03:59:16 PDT 2025
https://github.com/mariusz-sikora-at-amd updated https://github.com/llvm/llvm-project/pull/130041
>From 8e318f191368c22c03943bcb4e7b7bd81e258486 Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev at amd.com>
Date: Mon, 3 Mar 2025 05:34:48 -0500
Subject: [PATCH 1/5] [AMDGPU] Support image_bvh8_intersect_ray instruction and
intrinsic.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 11 +++
.../AMDGPU/AMDGPUInstructionSelector.cpp | 1 +
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 21 +++--
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 +-
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 18 ++--
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 32 ++++---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 ++--
llvm/lib/Target/AMDGPU/SIInstructions.td | 8 ++
.../AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll | 87 +++++++++++++++++++
llvm/test/MC/AMDGPU/gfx12_asm_vimage.s | 3 +
llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s | 3 +
.../Disassembler/AMDGPU/gfx12_dasm_vimage.txt | 3 +
12 files changed, 171 insertions(+), 35 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 307e4b8a01e5c..ad2ddc8e78939 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2824,6 +2824,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray :
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
[IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+// <vdata>, <ray_origin>, <ray_dir>
+// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>,
+// <instance_mask>, <ray_origin>,
+// <ray_dir>, <offset>,
+// <texture_descr>
+def int_amdgcn_image_bvh8_intersect_ray :
+ Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
+ llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
+ [IntrReadMem, IntrWillReturn]>;
+
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
Intrinsic<[llvm_i32_ty],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 745621fc1e089..d1b1cb788b7d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4118,6 +4118,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
}
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
return selectBVHIntersectRayIntrinsic(I);
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index efa042b033628..bb5c2e3f1efe5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7198,8 +7198,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
return true;
}
-bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
+ MachineInstr &MI, MachineIRBuilder &B) const {
const LLT S32 = LLT::scalar(32);
const LLT V2S32 = LLT::fixed_vector(2, 32);
@@ -7222,17 +7222,21 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
return false;
}
+ bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() ==
+ Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
- const unsigned NumVAddrDwords = 12;
- int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
- AMDGPU::MIMGEncGfx12, NumVDataDwords,
- NumVAddrDwords);
+ const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
+ int Opcode = AMDGPU::getMIMGOpcode(
+ IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
auto RayExtentInstanceMaskVec = B.buildMergeLikeInstr(
V2S32, {RayExtent, B.buildAnyExt(S32, InstanceMask)});
- B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
+ B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY
+ : AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
.addDef(DstReg)
.addDef(DstOrigin)
.addDef(DstDir)
@@ -7598,7 +7602,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
- return legalizeBVHDualIntrinsic(MI, B);
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray:
+ return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index aba1f55330913..1f4e02b0d600a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -208,7 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const;
- bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI,
+ MachineIRBuilder &B) const;
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index acdf6a932a0bf..7df1e634b21ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3240,9 +3240,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
- bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
- unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
+ bool IsDualOrBVH8 =
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
+ unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
@@ -5036,13 +5039,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
- bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
- unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
+ bool IsDualOrBVH8 =
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
+ unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
- if (IsDual) {
+ if (IsDualOrBVH8) {
OpdsMapping[1] = AMDGPU::getValueMapping(
AMDGPU::VGPRRegBankID,
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
@@ -5060,7 +5066,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
} else {
// NSA form
- unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
+ unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 63af4b2e351fb..2b9bc2b89a825 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1509,18 +1509,19 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
-class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
- int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
+class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
+ int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)));
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
- !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
+ !if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32],
+ !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
!if(IsA16,
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
- [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])));
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1554,26 +1555,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
}
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
- bit isDual,
+ bit isDual, bit isBVH8,
list<RegisterClass> addr_types>
- : VIMAGE_gfx12<op.GFX12, !if(isDual,
+ : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
(outs VReg_128:$vdata)),
num_addrs, "GFX12", addr_types> {
- let Constraints = !if(isDual,
+ let Constraints = !if(!or(isDual, isBVH8),
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
- !if(isDual, (ins), (ins A16:$a16)));
+ !if(!or(isDual, isBVH8), (ins), (ins A16:$a16)));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
- !if(isDual, "", "$a16");
- let SchedRW = !if(isDual,
+ !if(!or(isDual, isBVH8), "", "$a16");
+ let SchedRW = !if(!or(isDual, isBVH8),
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
}
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
- bit isDual> {
- defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
+ bit isDual, bit isBVH8 = 0> {
+ defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>;
def "" : MIMGBaseOpcode {
let BVH = 1;
let A16 = IsA16;
@@ -1611,8 +1612,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
}
}
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
- isDual, info.GFX11PlusAddrTypes> {
- let VDataDwords = !if(isDual, 10, 4);
+ isDual, isBVH8,
+ info.GFX11PlusAddrTypes> {
+ let VDataDwords = !if(!or(isDual, isBVH8), 10, 4);
let VAddrDwords = info.num_addrs;
}
}
@@ -1791,11 +1793,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
+defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>;
let SubtargetPredicate = isGFX12Plus in {
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
+ def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">;
}
} // End let OtherPredicates = [HasImageInsts]
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 09f011410e0a4..1fa5afc6f98aa 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1387,7 +1387,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
- case Intrinsic::amdgcn_image_bvh_intersect_ray: {
+ case Intrinsic::amdgcn_image_bvh_intersect_ray:
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT =
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
@@ -9443,7 +9444,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, VT,
M->getMemOperand());
}
- case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
+ case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
SDValue RayExtent = M->getOperand(3);
@@ -9461,11 +9463,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
+ bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
- const unsigned NumVAddrDwords = 12;
- int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
- AMDGPU::MIMGEncGfx12, NumVDataDwords,
- NumVAddrDwords);
+ const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
+ int Opcode = AMDGPU::getMIMGOpcode(
+ IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
SmallVector<SDValue, 7> Ops;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b2315bc80f0a4..47a78bbcd7aee 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4376,6 +4376,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
let mayStore = 0;
}
+def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
+ let InOperandList = (ins unknown:$opcode, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
// if necessary.
def G_SI_CALL : AMDGPUGenericInstruction {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
new file mode 100644
index 0000000000000..ff65d5d96cb2c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+
+declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>)
+
+define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
+; GFX12-SDAG-LABEL: image_bvh8_intersect_ray:
+; GFX12-SDAG: ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
+; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: image_bvh8_intersect_ray:
+; GFX12-GISEL: ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
+; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
+; GFX12-GISEL-NEXT: ; return to shader part epilog
+main_body:
+ %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
+ %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
+ %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
+ %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
+ %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
+ %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
+ %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
+ %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
+ %r = bitcast <10 x i32> %a to <10 x float>
+ %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
+ store <3 x float> %o, ptr addrspace(1) %origin
+ %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
+ store <3 x float> %d, ptr addrspace(1) %dir
+ ret <10 x float> %r
+}
+
+define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
+; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1:
+; GFX12-SDAG: ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1
+; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
+; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1:
+; GFX12-GISEL: ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1
+; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
+; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
+; GFX12-GISEL-NEXT: ; return to shader part epilog
+main_body:
+ %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
+ %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
+ %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
+ %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
+ %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
+ %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
+ %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
+ %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
+ %r = bitcast <10 x i32> %a to <10 x float>
+ %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
+ store <3 x float> %o, ptr addrspace(1) %origin
+ %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
+ store <3 x float> %d, ptr addrspace(1) %dir
+ ret <10 x float> %r
+}
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s
index 3ca8f4308a0ee..c99123bbe1ee0 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s
@@ -1069,6 +1069,9 @@ image_bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17]], s[4:7] a16
image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
// GFX12: encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
+image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
+// GFX12: encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
+
image_get_resinfo v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D
// GFX12: encoding: [0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s
index 0148ff6cabc93..f693fe3d22d26 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s
@@ -44,3 +44,6 @@ bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17], v[18:20]], s[4:7]
bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
// GFX12: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] ; encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
+
+bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
+// GFX12: image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] ; encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt
index afb7c3c24db17..387bdf5a6018f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt
@@ -1069,6 +1069,9 @@
# GFX12: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] ; encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06
+# GFX12: image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] ; encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
+0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06
+
# GFX12: image_get_resinfo v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00]
0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00
>From 711d8f755ba601cffa2ac875499847f614ded2d3 Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Sun, 16 Mar 2025 15:59:16 -0400
Subject: [PATCH 2/5] Update Intrinsic properties
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ad2ddc8e78939..f53016f62abbe 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2833,7 +2833,7 @@ def int_amdgcn_image_bvh8_intersect_ray :
Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
[llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
- [IntrReadMem, IntrWillReturn]>;
+ [IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
>From 36c8d1ca5c4a894af88d27ca82a1a81b79640437 Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Sun, 16 Mar 2025 16:00:27 -0400
Subject: [PATCH 3/5] Use !cond for addr types
---
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 2b9bc2b89a825..9064af6807682 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1517,11 +1517,10 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
- !if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32],
- !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
- !if(IsA16,
- [node_ptr_type, VGPR_32, VReg_96, VReg_96],
- [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])));
+ !cond(!eq(isBVH8, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32],
+ !eq(isDual, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64],
+ !eq(IsA16, 0) : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96],
+ !eq(IsA16, 1) : [node_ptr_type, VGPR_32, VReg_96, VReg_96]);
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
>From ac8fb52e016e784d2b1f5ef928be34b2c3d9ab77 Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Tue, 18 Mar 2025 06:52:40 -0400
Subject: [PATCH 4/5] Update AddrType cond
---
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 9064af6807682..fd19ebf8d069f 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1517,10 +1517,10 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
- !cond(!eq(isBVH8, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32],
- !eq(isDual, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64],
- !eq(IsA16, 0) : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96],
- !eq(IsA16, 1) : [node_ptr_type, VGPR_32, VReg_96, VReg_96]);
+ !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32],
+ isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64],
+ IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96],
+ true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
>From 6534bbe86af481cafbf4d50796becedc323b6195 Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Wed, 19 Mar 2025 05:36:45 -0400
Subject: [PATCH 5/5] Subtarget feature for bvh dual and bvh8
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 12 ++++++------
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++--
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0cac6d9674b5a..84619dd656f35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1094,10 +1094,10 @@ def FeaturePrngInst : SubtargetFeature<"prng-inst",
"Has v_prng_b32 instruction"
>;
-def FeatureBVHDualInst : SubtargetFeature<"bvh-dual-inst",
- "HasBVHDualInst",
+def FeatureBVHDualAndBVH8Insts : SubtargetFeature<"bvh-dual-bvh-8-insts",
+ "HasBVHDualAndBVH8Insts",
"true",
- "Has image_bvh_dual_intersect_ray instruction"
+ "Has image_bvh_dual_intersect_ray and image_bvh8_intersect_ray instructions"
>;
//===------------------------------------------------------------===//
@@ -1857,7 +1857,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
FeatureMemoryAtomicFAddF32DenormalSupport,
- FeatureBVHDualInst
+ FeatureBVHDualAndBVH8Insts
]>;
def FeatureISAVersion12_Generic: FeatureSet<
@@ -2513,8 +2513,8 @@ def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;
-def HasBVHDualInst : Predicate<"Subtarget->hasBVHDualInst()">,
- AssemblerPredicate<(all_of FeatureBVHDualInst)>;
+def HasBVHDualAndBVH8Insts : Predicate<"Subtarget->hasBVHDualAndBVH8Insts()">,
+ AssemblerPredicate<(all_of FeatureBVHDualAndBVH8Insts)>;
def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index bb5c2e3f1efe5..668c70780ee90 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7214,7 +7214,7 @@ bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
Register Offsets = MI.getOperand(9).getReg();
Register TDescr = MI.getOperand(10).getReg();
- if (!ST.hasBVHDualInst()) {
+ if (!ST.hasBVHDualAndBVH8Insts()) {
DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(),
"intrinsic not supported on subtarget",
MI.getDebugLoc());
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f3d6f95568dde..7384278d81cc1 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -229,7 +229,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasRestrictedSOffset = false;
bool HasBitOp3Insts = false;
bool HasPrngInst = false;
- bool HasBVHDualInst = false;
+ bool HasBVHDualAndBVH8Insts = false;
bool HasPermlane16Swap = false;
bool HasPermlane32Swap = false;
bool HasVcmpxPermlaneHazard = false;
@@ -1366,7 +1366,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPrngInst() const { return HasPrngInst; }
- bool hasBVHDualInst() const { return HasBVHDualInst; }
+ bool hasBVHDualAndBVH8Insts() const { return HasBVHDualAndBVH8Insts; }
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1fa5afc6f98aa..b5f5865269e0a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9458,7 +9458,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
assert(NodePtr.getValueType() == MVT::i64);
assert(RayDir.getValueType() == MVT::v3f32);
- if (!Subtarget->hasBVHDualInst()) {
+ if (!Subtarget->hasBVHDualAndBVH8Insts()) {
emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
return SDValue();
}
More information about the llvm-commits
mailing list