[clang] [flang] [llvm] [AMDGPU] Add "lds-buffer-load-insts" attribute for all targets < gfx11 (PR #133055)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 02:08:58 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-clang
Author: Juan Manuel Martinez CaamaƱo (jmmartinez)
<details>
<summary>Changes</summary>
This are used to restrict the availability of buffer_load_lds intrinsics to targets that actually have this instructions.
---
Full diff: https://github.com/llvm/llvm-project/pull/133055.diff
9 Files Affected:
- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+1-1)
- (modified) clang/test/CodeGen/link-builtin-bitcode.c (+3-3)
- (modified) clang/test/CodeGenCXX/dynamic-cast-address-space.cpp (+2-2)
- (modified) flang/test/Lower/OpenMP/target_cpu_features.f90 (+2-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+11-5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+2-1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+3)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+2-1)
- (modified) llvm/lib/TargetParser/TargetParser.cpp (+5)
``````````diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index a42b4589fb5ac..ed578890fc71f 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -260,7 +260,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
- for (auto F : {"image-insts", "gws"})
+ for (auto F : {"image-insts", "gws", "lds-buffer-load-insts"})
ReadOnlyFeatures.insert(F);
HalfArgsAndReturns = true;
}
diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c
index 470180efa4247..efce9d0af5d49 100644
--- a/clang/test/CodeGen/link-builtin-bitcode.c
+++ b/clang/test/CodeGen/link-builtin-bitcode.c
@@ -44,6 +44,6 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" }
+// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" }
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 5d49cc0544b9c..78ccfad24cb27 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -112,9 +112,9 @@ const B& f(A *a) {
// CHECK: attributes #[[ATTR3]] = { nounwind }
// CHECK: attributes #[[ATTR4]] = { noreturn }
//.
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
//.
diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90
index ea8efcf5d256b..87079b30860f6 100644
--- a/flang/test/Lower/OpenMP/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -11,8 +11,8 @@
!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
-!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
-!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
+!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lds-buffer-load-insts",
+!AMDGCN-SAME: "+mai-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
!NVPTX: module attributes {
!NVPTX-SAME: fir.target_cpu = "sm_80"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 84619dd656f35..a3ddafde3c2e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1269,6 +1269,12 @@ def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
>;
+def FeatureLDSBufferLoad : SubtargetFeature<"lds-buffer-load-insts",
+ "HasLDSBufferLoad",
+ "true",
+ "The platform has buffer_load lds instructions"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -1290,7 +1296,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
]
>;
@@ -1304,7 +1310,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
]
>;
@@ -1320,7 +1326,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
- FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
+ FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
]
>;
@@ -1339,7 +1345,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
- FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder
+ FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
]
>;
@@ -1363,7 +1369,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d1b1cb788b7d2..f6905b641cbc6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3368,7 +3368,8 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
}
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
- assert(!AMDGPU::isGFX12Plus(STI));
+ if (!Subtarget->hasLDSBufferLoad())
+ return false;
unsigned Opc;
unsigned Size = MI.getOperand(3).getImm();
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 7384278d81cc1..e00ef6f6f5ac2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -193,6 +193,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool SupportsSRAMECC = false;
bool DynamicVGPR = false;
bool DynamicVGPRBlockSize32 = false;
+ bool HasLDSBufferLoad = false;
// This should not be used directly. 'TargetID' tracks the dynamic settings
// for SRAMECC.
@@ -1318,6 +1319,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return hasGFX950Insts();
}
+ bool hasLDSBufferLoad() const { return HasLDSBufferLoad; }
+
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b0c18715ef810..1dbfd89d2cb38 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10104,7 +10104,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
case Intrinsic::amdgcn_struct_buffer_load_lds:
case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
- assert(!AMDGPU::isGFX12Plus(*Subtarget));
+ if (!Subtarget->hasLDSBufferLoad())
+ return SDValue();
unsigned Opc;
bool HasVIndex =
IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds ||
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 8731a16b88a5c..9ff70ebad1cdc 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -374,6 +374,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["prng-inst"] = true;
Features["wavefrontsize32"] = true;
Features["wavefrontsize64"] = true;
+ Features["lds-buffer-load-insts"] = true;
} else if (T.isAMDGCN()) {
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
switch (Kind) {
@@ -459,6 +460,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
Features["gws"] = true;
+ Features["lds-buffer-load-insts"] = true;
break;
case GK_GFX1012:
case GK_GFX1011:
@@ -483,6 +485,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
Features["gws"] = true;
+ Features["lds-buffer-load-insts"] = true;
break;
case GK_GFX950:
Features["bitop3-insts"] = true;
@@ -533,6 +536,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["ci-insts"] = true;
Features["s-memtime-inst"] = true;
Features["gws"] = true;
+ Features["lds-buffer-load-insts"] = true;
break;
case GK_GFX90A:
Features["gfx90a-insts"] = true;
@@ -585,6 +589,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["image-insts"] = true;
Features["s-memtime-inst"] = true;
Features["gws"] = true;
+ Features["lds-buffer-load-insts"] = true;
break;
case GK_NONE:
break;
``````````
</details>
https://github.com/llvm/llvm-project/pull/133055
More information about the llvm-commits
mailing list