[PATCH] D123693: Transform tex2D to legal intrinsic on gfx90a.

Wed Apr 13 10:39:04 PDT 2022

Leonc created this revision.
Herald added subscribers: hsmhsm, foad, wenlei, kerbowa, hiraditya, nhaehnle, jvesely, arsenm.
Herald added a project: All.
Leonc requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Replace `image_sample_lz_2d` intrinsic with `image_sample_2d` when target is gfx90a.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D123693

Files:
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
@@ -66,9 +66,19 @@
   ret <4 x float> %v
 }
 
+; GFX90A-LABEL: {{^}}sample_lz_2d:
+; GFX90A-NOT: s_wqm_b64
+; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
+define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7208,6 +7208,13 @@
   unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
   SDLoc DL(Op);
 
+  auto LowerDefaultCase = [&]() -> SDValue {
+    if (auto ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) {
+      return lowerImage(Op, ImageDimIntr, DAG, true);
+    }
+    return {};
+  };
+
   switch (IntrID) {
   case Intrinsic::amdgcn_ds_ordered_add:
   case Intrinsic::amdgcn_ds_ordered_swap: {
@@ -7826,13 +7833,31 @@
                                    M->getVTList(), Ops, M->getMemoryVT(),
                                    M->getMemOperand());
   }
+  case Intrinsic::amdgcn_image_sample_lz_2d: {
+    if (AMDGPU::isGFX90A(*Subtarget)) {
+      // Replace `image_sample_lz_2d` with `image_sample_2d`.
+      auto IntrinsicIDType = Op.getOperand(1u)->getValueType(0u);
+      auto NewIntrinsicID = DAG.getConstant(
+        Intrinsic::amdgcn_image_sample_2d, DL, IntrinsicIDType
+      );
+      auto const NumOps = Op.getNumOperands();
+      auto Ops = SmallVector<SDValue, 8u>();
+      Ops.push_back(Op.getOperand(0u)); // chain
+      Ops.push_back(NewIntrinsicID);
+      for (auto i = 2u; i < NumOps; ++i) {
+        Ops.push_back(Op.getOperand(i));
+      }
+      auto Opcode = Op.getOpcode();
+      auto VTs = Op.getNode()->getVTList();
+      auto MemNode = cast<MemSDNode>(Op);
+      return DAG.getMemIntrinsicNode(Opcode, DL, VTs, Ops, 
+                                     MemNode->getMemoryVT(),
+                                     MemNode->getMemOperand());
+    }
+    return LowerDefaultCase();
+  }
   default:
-
-    if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
-            AMDGPU::getImageDimIntrinsicInfo(IntrID))
-      return lowerImage(Op, ImageDimIntr, DAG, true);
-
-    return SDValue();
+    return LowerDefaultCase();
   }
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D123693.422560.patch
Type: text/x-patch
Size: 3634 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220413/e312f1c4/attachment.bin>