[llvm] dcaa29c - Revert "[AMDGPU][gfx1250] Add `cu-store` subtarget feature (#150588)" (#157639)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 01:21:03 PDT 2025
Author: Pierre van Houtryve
Date: 2025-09-10T10:20:59+02:00
New Revision: dcaa29c8ed6cc3b62368cdd609f2d05a25541366
URL: https://github.com/llvm/llvm-project/commit/dcaa29c8ed6cc3b62368cdd609f2d05a25541366
DIFF: https://github.com/llvm/llvm-project/commit/dcaa29c8ed6cc3b62368cdd609f2d05a25541366.diff
LOG: Revert "[AMDGPU][gfx1250] Add `cu-store` subtarget feature (#150588)" (#157639)
This reverts commit be17791f2624f22b3ed24a2539406164a379125d.
This is not necessary for gfx1250 anymore.
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s
llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s
Removed:
llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index afd0d9e7539ef..2cddc3365d5d7 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -768,9 +768,6 @@ For example:
performant than code generated for XNACK replay
disabled.
- cu-stores TODO On GFX12.5, controls whether ``scope:SCOPE_CU`` stores may be used.
- If disabled, all stores will be done at ``scope:SCOPE_SE`` or greater.
-
=============== ============================ ==================================================
.. _amdgpu-target-id:
@@ -5114,9 +5111,7 @@ The fields used by CP for code objects before V3 also match those specified in
and must be 0,
>454 1 bit ENABLE_SGPR_PRIVATE_SEGMENT
_SIZE
- 455 1 bit USES_CU_STORES GFX12.5: Whether the ``cu-stores`` target attribute is enabled.
- If 0, then all stores are ``SCOPE_SE`` or higher.
- 457:456 2 bits Reserved, must be 0.
+ 457:455 3 bits Reserved, must be 0.
458 1 bit ENABLE_WAVEFRONT_SIZE32 GFX6-GFX9
Reserved, must be 0.
GFX10-GFX11
@@ -18254,8 +18249,6 @@ terminated by an ``.end_amdhsa_kernel`` directive.
GFX942)
``.amdhsa_user_sgpr_private_segment_size`` 0 GFX6-GFX12 Controls ENABLE_SGPR_PRIVATE_SEGMENT_SIZE in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
- ``.amdhsa_uses_cu_stores`` 0 GFX12.5 Controls USES_CU_STORES in
- :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_wavefront_size32`` Target GFX10-GFX12 Controls ENABLE_WAVEFRONT_SIZE32 in
Feature :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
Specific
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index fb9d68428cf18..418d2b36114c5 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -252,8 +252,7 @@ enum : int32_t {
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
- KERNEL_CODE_PROPERTY(RESERVED0, 7, 2),
- KERNEL_CODE_PROPERTY(USES_CU_STORES, 9, 1), // GFX12.5 +cu-stores
+ KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index a366db1c580ba..ffbda14dcd849 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -289,12 +289,6 @@ def FeatureSafeCUPrefetch : SubtargetFeature<"safe-cu-prefetch",
"VMEM CU scope prefetches do not fail on illegal address"
>;
-def FeatureCUStores : SubtargetFeature<"cu-stores",
- "HasCUStores",
- "true",
- "Whether SCOPE_CU stores can be used on GFX12.5"
->;
-
def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
"HasVcmpxExecWARHazard",
"true",
@@ -2042,7 +2036,6 @@ def FeatureISAVersion12_50 : FeatureSet<
[FeatureGFX12,
FeatureGFX1250Insts,
FeatureRequiresAlignedVGPRs,
- FeatureCUStores,
FeatureAddressableLocalMemorySize327680,
FeatureCuMode,
Feature1024AddressableVGPRs,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 1fff188c75819..29f8f9bc8b54c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -557,7 +557,6 @@ const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
MCContext &Ctx = MF.getContext();
uint16_t KernelCodeProperties = 0;
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
KernelCodeProperties |=
@@ -587,13 +586,10 @@ const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
}
- if (ST.isWave32()) {
+ if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
}
- if (isGFX1250(ST) && ST.hasCUStores()) {
- KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_CU_STORES;
- }
// CurrentProgramInfo.DynamicCallStack is a MCExpr and could be
// un-evaluatable at this point so it cannot be conditionally checked here.
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2ce1e9e410b23..e420f2ad676f9 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -6181,12 +6181,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
- } else if (ID == ".amdhsa_uses_cu_stores") {
- if (!isGFX1250())
- return Error(IDRange.Start, "directive requires gfx12.5", IDRange);
-
- PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_USES_CU_STORES, ExprVal, ValRange);
} else if (ID == ".amdhsa_wavefront_size32") {
EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 10)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index b50b2a2e6e23c..6f6039bf4ec21 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2639,9 +2639,6 @@ Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
- if (isGFX1250())
- PRINT_DIRECTIVE(".amdhsa_uses_cu_stores",
- KERNEL_CODE_PROPERTY_USES_CU_STORES);
if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 556ec683f2ec6..e172a9c699fb1 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -252,7 +252,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasVmemPrefInsts = false;
bool HasSafeSmemPrefetch = false;
bool HasSafeCUPrefetch = false;
- bool HasCUStores = false;
bool HasVcmpxExecWARHazard = false;
bool HasLdsBranchVmemWARHazard = false;
bool HasNSAtoVMEMBug = false;
@@ -1017,8 +1016,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
- bool hasCUStores() const { return HasCUStores; }
-
// Has s_cmpk_* instructions.
bool hasSCmpK() const { return getGeneration() < GFX12; }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 0bbab29dbda18..ff6a21239345d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -448,11 +448,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
".amdhsa_user_sgpr_private_segment_size");
- if (isGFX1250(STI))
- PrintField(KD.kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_USES_CU_STORES_SHIFT,
- amdhsa::KERNEL_CODE_PROPERTY_USES_CU_STORES,
- ".amdhsa_uses_cu_stores");
if (IVersion.Major >= 10)
PrintField(KD.kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index c20fcacb8fb26..f61c0d8f84b29 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2657,9 +2657,7 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
// GFX12.5 only: Require SCOPE_SE on stores that may hit the scratch address
// space.
- // We also require SCOPE_SE minimum if we not have the "cu-stores" feature.
- if (Scope == CPol::SCOPE_CU &&
- (!ST.hasCUStores() || TII->mayAccessScratchThroughFlat(MI)))
+ if (TII->mayAccessScratchThroughFlat(MI) && Scope == CPol::SCOPE_CU)
return setScope(MI, CPol::SCOPE_SE);
return Changed;
diff --git a/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll b/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll
deleted file mode 100644
index fcdba69c30213..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -mcpu=gfx1250 -mattr=-cu-stores < %s | FileCheck --check-prefixes=GCN,NOCU %s
-
-; Check that if -cu-stores is used, we use SCOPE_SE minimum on all stores.
-
-; GCN: flat_store:
-; CU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; NOCU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; GCN: .amdhsa_kernel flat_store
-; CU: .amdhsa_uses_cu_stores 1
-; NOCU: .amdhsa_uses_cu_stores 0
-define amdgpu_kernel void @flat_store(ptr %dst, i32 %val) {
-entry:
- store i32 %val, ptr %dst
- ret void
-}
-
-; GCN: global_store:
-; CU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}}{{$}}
-; NOCU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; GCN: .amdhsa_kernel global_store
-; CU: .amdhsa_uses_cu_stores 1
-; NOCU: .amdhsa_uses_cu_stores 0
-define amdgpu_kernel void @global_store(ptr addrspace(1) %dst, i32 %val) {
-entry:
- store i32 %val, ptr addrspace(1) %dst
- ret void
-}
-
-; GCN: local_store:
-; CU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
-; NOCU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
-; GCN: .amdhsa_kernel local_store
-; CU: .amdhsa_uses_cu_stores 1
-; NOCU: .amdhsa_uses_cu_stores 0
-define amdgpu_kernel void @local_store(ptr addrspace(3) %dst, i32 %val) {
-entry:
- store i32 %val, ptr addrspace(3) %dst
- ret void
-}
-
-; GCN: scratch_store:
-; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; GCN: .amdhsa_kernel scratch_store
-; CU: .amdhsa_uses_cu_stores 1
-; NOCU: .amdhsa_uses_cu_stores 0
-define amdgpu_kernel void @scratch_store(ptr addrspace(5) %dst, i32 %val) {
-entry:
- store i32 %val, ptr addrspace(5) %dst
- ret void
-}
-
-; GCN: flat_atomic_store:
-; CU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; NOCU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; GCN: .amdhsa_kernel flat_atomic_store
-; CU: .amdhsa_uses_cu_stores 1
-; NOCU: .amdhsa_uses_cu_stores 0
-define amdgpu_kernel void @flat_atomic_store(ptr %dst, i32 %val) {
-entry:
- store atomic i32 %val, ptr %dst syncscope("wavefront") unordered, align 4
- ret void
-}
-
-; GCN: global_atomic_store:
-; CU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}}{{$}}
-; NOCU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
-; GCN: .amdhsa_kernel global_atomic_store
-; CU: .amdhsa_uses_cu_stores 1
-; NOCU: .amdhsa_uses_cu_stores 0
-define amdgpu_kernel void @global_atomic_store(ptr addrspace(1) %dst, i32 %val) {
-entry:
- store atomic i32 %val, ptr addrspace(1) %dst syncscope("wavefront") unordered, align 4
- ret void
-}
-
-; GCN: local_atomic_store:
-; CU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
-; NOCU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
-; GCN: .amdhsa_kernel local_atomic_store
-; CU: .amdhsa_uses_cu_stores 1
-; NOCU: .amdhsa_uses_cu_stores 0
-define amdgpu_kernel void @local_atomic_store(ptr addrspace(3) %dst, i32 %val) {
-entry:
- store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
- ret void
-}
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s
index e19be77e901fa..3c693610bee51 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s
@@ -40,7 +40,7 @@
// OBJDUMP-NEXT: 0040 01000000 01000000 0c000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00c00000
-// OBJDUMP-NEXT: 0070 005021c4 410f007f 5e068200 00000000
+// OBJDUMP-NEXT: 0070 005021c4 410f007f 5e048200 00000000
// special_sgpr
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
@@ -127,7 +127,6 @@ max_vgprs:
.amdhsa_user_sgpr_kernarg_preload_length 2
.amdhsa_user_sgpr_kernarg_preload_offset 1
.amdhsa_user_sgpr_private_segment_size 1
- .amdhsa_uses_cu_stores 1
.amdhsa_wavefront_size32 1
.amdhsa_enable_private_segment 1
.amdhsa_system_sgpr_workgroup_id_x 0
@@ -168,7 +167,6 @@ max_vgprs:
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 2
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_uses_cu_stores 1
// ASM-NEXT: .amdhsa_wavefront_size32 1
// ASM-NEXT: .amdhsa_enable_private_segment 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
diff --git a/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test b/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test
index 369005f4ea432..fdca11b95caa6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test
+++ b/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test
@@ -13,10 +13,10 @@
# RES_4_2: ; error decoding test.kd: kernel descriptor reserved bits in range (511:480) set
# RES_4_2-NEXT: ; decoding failed region as bytes
-# RUN: yaml2obj %s -DGPU=GFX90A -DKD=00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003000000000000 \
-# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=RES_456
-# RES_456: ; error decoding test.kd: kernel descriptor reserved bits in range (456:455) set
-# RES_456-NEXT: ; decoding failed region as bytes
+# RUN: yaml2obj %s -DGPU=GFX90A -DKD=00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006000000000000 \
+# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=RES_457
+# RES_457: ; error decoding test.kd: kernel descriptor reserved bits in range (457:455) set
+# RES_457-NEXT: ; decoding failed region as bytes
# RUN: yaml2obj %s -DGPU=GFX90A -DKD=0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c000000000000 \
# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=WF32
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s
index 99a4df3e5adfb..3e96ea3c67380 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s
@@ -49,7 +49,6 @@
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
-; CHECK-NEXT: .amdhsa_uses_cu_stores 1
; CHECK-NEXT: .amdhsa_wavefront_size32 1
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
@@ -57,7 +56,6 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_inst_pref_size 0
- .amdhsa_uses_cu_stores 1
.end_amdhsa_kernel
;--- 2.s
@@ -107,7 +105,6 @@
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
-; CHECK-NEXT: .amdhsa_uses_cu_stores 0
; CHECK-NEXT: .amdhsa_wavefront_size32 1
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
@@ -116,6 +113,5 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_named_barrier_count 7
- .amdhsa_uses_cu_stores 0
.amdhsa_inst_pref_size 63
.end_amdhsa_kernel
More information about the llvm-commits
mailing list