[llvm] 96e1032 - [AMDGPU] Add extended-image-insts to RemoveIncompatibleFunctions

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 8 06:16:02 PDT 2023


Author: pvanhout
Date: 2023-08-08T15:15:57+02:00
New Revision: 96e1032a5e59ea3161cc96c63e83c109e5519655

URL: https://github.com/llvm/llvm-project/commit/96e1032a5e59ea3161cc96c63e83c109e5519655
DIFF: https://github.com/llvm/llvm-project/commit/96e1032a5e59ea3161cc96c63e83c109e5519655.diff

LOG: [AMDGPU] Add extended-image-insts to RemoveIncompatibleFunctions

Otherwise device libs still has issues at O0 (in OpenCL-CTS)

Depends on D156972 as well. They're unrelated fixes but both are needed to fix the issue.

Fixes SWDEV-402331

Reviewed By: #amdgpu, arsenm

Differential Revision: https://reviews.llvm.org/D156973

Added: 
    llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
index 580352fb8cf443..9ae3ce53353e5f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -90,13 +90,21 @@ const SubtargetSubTypeKV *getGPUInfo(const GCNSubtarget &ST,
 }
 
 constexpr unsigned FeaturesToCheck[] = {
-    AMDGPU::FeatureGFX11Insts, AMDGPU::FeatureGFX10Insts,
-    AMDGPU::FeatureGFX9Insts,  AMDGPU::FeatureGFX8Insts,
-    AMDGPU::FeatureDPP,        AMDGPU::Feature16BitInsts,
-    AMDGPU::FeatureDot1Insts,  AMDGPU::FeatureDot2Insts,
-    AMDGPU::FeatureDot3Insts,  AMDGPU::FeatureDot4Insts,
-    AMDGPU::FeatureDot5Insts,  AMDGPU::FeatureDot6Insts,
-    AMDGPU::FeatureDot7Insts,  AMDGPU::FeatureDot8Insts,
+    AMDGPU::FeatureGFX11Insts,
+    AMDGPU::FeatureGFX10Insts,
+    AMDGPU::FeatureGFX9Insts,
+    AMDGPU::FeatureGFX8Insts,
+    AMDGPU::FeatureDPP,
+    AMDGPU::Feature16BitInsts,
+    AMDGPU::FeatureDot1Insts,
+    AMDGPU::FeatureDot2Insts,
+    AMDGPU::FeatureDot3Insts,
+    AMDGPU::FeatureDot4Insts,
+    AMDGPU::FeatureDot5Insts,
+    AMDGPU::FeatureDot6Insts,
+    AMDGPU::FeatureDot7Insts,
+    AMDGPU::FeatureDot8Insts,
+    AMDGPU::FeatureExtendedImageInsts,
 };
 
 FeatureBitset expandImpliedFeatures(const FeatureBitset &Features) {

diff  --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
new file mode 100644
index 00000000000000..b24717ef396760
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx908 -stop-after=amdgpu-remove-incompatible-functions\
+; RUN:   -pass-remarks=amdgpu-remove-incompatible-functions %s -o - 2>%t | FileCheck -check-prefix=EXTIMG %s
+; RUN: FileCheck -allow-empty --check-prefix=WARN-EXTIMG %s < %t
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx90a -stop-after=amdgpu-remove-incompatible-functions\
+; RUN:   -pass-remarks=amdgpu-remove-incompatible-functions %s -o - 2>%t | FileCheck -check-prefix=NOEXTIMG %s
+; RUN: FileCheck --check-prefix=WARN-NOEXTIMG %s < %t
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s
+
+; Note: This test checks the IR, but also has a run line to codegen the file just to check we
+; do not crash when trying to select those functions.
+
+; WARN-EXTIMG-NOT: removing function 'needs_extimg':
+; WARN-NOEXTIMG:   removing function 'needs_extimg': +extended-image-insts is not supported on the current target
+
+; EXTIMG:       @GVRefs {{.*}} [ptr @needs_extimg]
+; NOEXTIMG:     @GVRefs {{.*}} zeroinitializer
+ at GVRefs = internal global [1 x ptr] [
+  ptr @needs_extimg
+]
+
+; EXTIMG:         @ConstantExpr = internal global i64 ptrtoint (ptr @needs_extimg to i64)
+; NOEXTIMG:       @ConstantExpr = internal global i64 0
+
+ at ConstantExpr = internal global i64 ptrtoint (ptr @needs_extimg to i64)
+
+; EXTIMG:         define <4 x float> @needs_extimg
+; NOEXTIMG-NOT:   define <4 x float> @needs_extimg
+define <4 x float> @needs_extimg(float noundef %0, float noundef %1, <8 x i32> noundef %2, <4 x i32> noundef %3) #0 {
+  %5 = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 noundef 15, float %0, float %1, <8 x i32> %2, <4 x i32> %3, i1 noundef false, i32 noundef 0, i32 noundef 0)
+  ret <4 x float> %5
+}
+
+; IR: define void @caller(
+define void @caller(float noundef %0, float noundef %1, <8 x i32> noundef %2, <4 x i32> noundef %3) {
+  ; EXTIMG: call void @needs_extimg(
+  ; NOEXTIMG: call void null
+  call void @needs_extimg(float %0, float %1, <8 x i32> %2, <4 x i32> %3)
+  ; IR: ret void
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
+
+attributes #0 = { "target-features"="+extended-image-insts" }


        


More information about the llvm-commits mailing list