[Openmp-commits] [lldb] [llvm] [mlir] [openmp] [libc] [flang] [clang] [AMDGPU] GFX12 global_atomic_ordered_add_b64 instruction and intrinsic (PR #76149)
Jay Foad via Openmp-commits
openmp-commits at lists.llvm.org
Thu Dec 21 06:57:45 PST 2023
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/76149
>From b14a554a15e4de88c9afc428f9c6898090e6eb23 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 21 Dec 2023 12:00:26 +0000
Subject: [PATCH] [AMDGPU] GFX12 global_atomic_ordered_add_b64 instruction and
intrinsic
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 10 ++-
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 +
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 +
.../Target/AMDGPU/AMDGPUSearchableTables.td | 1 +
llvm/lib/Target/AMDGPU/FLATInstructions.td | 11 +++-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 1 +
...vm.amdgcn.global.atomic.ordered.add.b64.ll | 65 +++++++++++++++++++
llvm/test/MC/AMDGPU/gfx11_unsupported.s | 3 +
llvm/test/MC/AMDGPU/gfx12_asm_vflat.s | 24 +++++++
.../Disassembler/AMDGPU/gfx12_dasm_vflat.txt | 12 ++++
10 files changed, 124 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 51bd9b63c127ed..3985c8871e1615 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
+def global_ptr_ty : LLVMQualPointerType<1>;
+
class AMDGPUReadPreloadRegisterIntrinsic
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
@@ -2353,10 +2355,10 @@ def int_amdgcn_s_get_waveid_in_workgroup :
Intrinsic<[llvm_i32_ty], [],
[IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
-class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
+class AMDGPUGlobalAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic <
[vt],
- [llvm_anyptr_ty, // vaddr
- vt], // vdata(VGPR)
+ [pt, // vaddr
+ vt], // vdata(VGPR)
[IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "",
[SDNPMemOperand]>;
@@ -2486,6 +2488,8 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var"
[IntrNoMem, IntrConvergent, IntrWillReturn,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
+def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUGlobalAtomicRtn<llvm_i64_ty, global_ptr_ty>;
+
def int_amdgcn_flat_atomic_fmin_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_flat_atomic_fmax_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_global_atomic_fmin_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index eaf72d7157ee2d..36e07d944c942c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -642,6 +642,7 @@ defm int_amdgcn_global_atomic_fmax : noret_op;
defm int_amdgcn_global_atomic_csub : noret_op;
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
+defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
defm int_amdgcn_global_atomic_fmin_num : noret_op;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index c9412f720c62ec..fba060464a6e74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4690,6 +4690,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
+ case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index beb670669581f1..4cc8871a00fe1f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -243,6 +243,7 @@ def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
+def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 0dd2b3f5c2c912..615f8cd54d8f9c 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -926,9 +926,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho
defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
-} // End is_flat_global = 1
-
+let SubtargetPredicate = isGFX12Plus in {
+ defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
+} // End SubtargetPredicate = isGFX12Plus
+} // End is_flat_global = 1
let SubtargetPredicate = HasFlatScratchInsts in {
defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
@@ -1529,6 +1531,10 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>;
+let OtherPredicates = [isGFX12Plus] in {
+ defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>;
+}
+
let OtherPredicates = [isGFX10Plus] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
@@ -2654,6 +2660,7 @@ defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_A
defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">;
defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">;
defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
+defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073, "GLOBAL_ATOMIC_ORDERED_ADD_B64", "global_atomic_ordered_add_b64">;
// ENC_VSCRATCH.
defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4f4bc45e49b43e..715f67876994af 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1243,6 +1243,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_fmax_num:
+ case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll
new file mode 100644
index 00000000000000..6a6c5b33e0dd8f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+
+declare i64 @llvm.amdgcn.global.atomic.ordered.add.b64(ptr addrspace(1), i64)
+
+define amdgpu_kernel void @global_atomic_ordered_add_b64_no_rtn(ptr addrspace(1) %addr, i64 %in) {
+; GFX12-SDAG-LABEL: global_atomic_ordered_add_b64_no_rtn:
+; GFX12-SDAG: ; %bb.0: ; %entry
+; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-SDAG-NEXT: global_atomic_ordered_add_b64 v[0:1], v2, v[0:1], s[0:1] offset:-32 th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: global_atomic_ordered_add_b64_no_rtn:
+; GFX12-GISEL: ; %bb.0: ; %entry
+; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX12-GISEL-NEXT: global_atomic_ordered_add_b64 v[0:1], v2, v[0:1], s[0:1] offset:-32 th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: s_endpgm
+entry:
+ %gep = getelementptr i64, ptr addrspace(1) %addr, i32 -4
+ %unused = call i64 @llvm.amdgcn.global.atomic.ordered.add.b64(ptr addrspace(1) %gep, i64 %in)
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_ordered_add_b64_rtn(ptr addrspace(1) %addr, i64 %in, ptr addrspace(1) %use) {
+; GFX12-SDAG-LABEL: global_atomic_ordered_add_b64_rtn:
+; GFX12-SDAG: ; %bb.0: ; %entry
+; GFX12-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
+; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s7 :: v_dual_mov_b32 v0, s6
+; GFX12-SDAG-NEXT: global_atomic_ordered_add_b64 v[0:1], v2, v[0:1], s[4:5] offset:32 th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX12-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-SDAG-NEXT: s_nop 0
+; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: global_atomic_ordered_add_b64_rtn:
+; GFX12-GISEL: ; %bb.0: ; %entry
+; GFX12-GISEL-NEXT: s_clause 0x1
+; GFX12-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
+; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
+; GFX12-GISEL-NEXT: global_atomic_ordered_add_b64 v[0:1], v2, v[0:1], s[4:5] offset:32 th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX12-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-GISEL-NEXT: s_nop 0
+; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-GISEL-NEXT: s_endpgm
+entry:
+ %gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
+ %val = call i64 @llvm.amdgcn.global.atomic.ordered.add.b64(ptr addrspace(1) %gep, i64 %in)
+ store i64 %val, ptr addrspace(1) %use
+ ret void
+}
diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported.s b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
index 89078c1ad4e049..e01eb05e85588d 100644
--- a/llvm/test/MC/AMDGPU/gfx11_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
@@ -2013,3 +2013,6 @@ ds_sub_clamp_rtn_u32 v5, v1, v2
ds_sub_clamp_u32 v1, v2
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_atomic_ordered_add_b64 v0, v[2:3], s[0:1] offset:64
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
index c0ffc5247d90e8..95d352b421a284 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
@@ -1266,6 +1266,30 @@ global_atomic_or_b64 v[1:2], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN
global_atomic_or_b64 v[1:2], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN
// GFX12: encoding: [0x7c,0x80,0x12,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00]
+global_atomic_ordered_add_b64 v0, v[2:3], s[0:1] offset:-64
+// GFX12: encoding: [0x00,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_atomic_ordered_add_b64 v0, v[2:3], s[0:1] offset:64
+// GFX12: encoding: [0x00,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_atomic_ordered_add_b64 v[0:1], v[2:3], off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_atomic_ordered_add_b64 v[0:1], v[2:3], off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_atomic_ordered_add_b64 v[1:2], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN
+// GFX12: encoding: [0x00,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff]
+
+global_atomic_ordered_add_b64 v[1:2], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN
+// GFX12: encoding: [0x00,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00]
+
+global_atomic_ordered_add_b64 v[1:2], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN
+// GFX12: encoding: [0x7c,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff]
+
+global_atomic_ordered_add_b64 v[1:2], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN
+// GFX12: encoding: [0x7c,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00]
+
global_atomic_sub_u32 v0, v2, s[0:1] offset:-64
// GFX12: encoding: [0x00,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
index d7f9daf295845a..f4038cf10f50dc 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
@@ -837,6 +837,18 @@
# GFX12: global_atomic_xor_b64 v[1:2], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x12,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00]
0x7c,0xc0,0x12,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00
+# GFX12: global_atomic_ordered_add_b64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_atomic_ordered_add_b64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x1c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_atomic_ordered_add_b64 v[1:2], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_atomic_ordered_add_b64 v[1:2], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x1c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00
+
# GFX12: global_load_addtid_b32 v1, off offset:64 ; encoding: [0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00
More information about the Openmp-commits
mailing list