[PATCH] D123693: Transform tex2D to legal intrinsic on gfx90a.
Leon Clark via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 13 10:39:04 PDT 2022
Leonc created this revision.
Herald added subscribers: hsmhsm, foad, wenlei, kerbowa, hiraditya, nhaehnle, jvesely, arsenm.
Herald added a project: All.
Leonc requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
Replace `image_sample_lz_2d` intrinsic with `image_sample_2d` when target is gfx90a.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D123693
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
@@ -66,9 +66,19 @@
ret <4 x float> %v
}
+; GFX90A-LABEL: {{^}}sample_lz_2d:
+; GFX90A-NOT: s_wqm_b64
+; GFX90A: image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
+define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7208,6 +7208,13 @@
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDLoc DL(Op);
+ auto LowerDefaultCase = [&]() -> SDValue {
+ if (auto ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) {
+ return lowerImage(Op, ImageDimIntr, DAG, true);
+ }
+ return {};
+ };
+
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
@@ -7826,13 +7833,31 @@
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
+ case Intrinsic::amdgcn_image_sample_lz_2d: {
+ if (AMDGPU::isGFX90A(*Subtarget)) {
+ // Replace `image_sample_lz_2d` with `image_sample_2d`.
+ auto IntrinsicIDType = Op.getOperand(1u)->getValueType(0u);
+ auto NewIntrinsicID = DAG.getConstant(
+ Intrinsic::amdgcn_image_sample_2d, DL, IntrinsicIDType
+ );
+ auto const NumOps = Op.getNumOperands();
+ auto Ops = SmallVector<SDValue, 8u>();
+ Ops.push_back(Op.getOperand(0u)); // chain
+ Ops.push_back(NewIntrinsicID);
+ for (auto i = 2u; i < NumOps; ++i) {
+ Ops.push_back(Op.getOperand(i));
+ }
+ auto Opcode = Op.getOpcode();
+ auto VTs = Op.getNode()->getVTList();
+ auto MemNode = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opcode, DL, VTs, Ops,
+ MemNode->getMemoryVT(),
+ MemNode->getMemOperand());
+ }
+ return LowerDefaultCase();
+ }
default:
-
- if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
- AMDGPU::getImageDimIntrinsicInfo(IntrID))
- return lowerImage(Op, ImageDimIntr, DAG, true);
-
- return SDValue();
+ return LowerDefaultCase();
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D123693.422560.patch
Type: text/x-patch
Size: 3634 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220413/e312f1c4/attachment.bin>
More information about the llvm-commits
mailing list