[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Nov 18 11:02:08 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/116680
Define global_load_lds_dwordx3 and global_load_dwordx4.
Oddly it seems dwordx2 was skipped.
>From 42f311ceb555ea2b3f171ad2ef8254e971e0be12 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 18 Jan 2024 14:44:03 +0700
Subject: [PATCH] AMDGPU: Handle gfx950 global_load_lds_* instructions
Define global_load_lds_dwordx3 and global_load_dwordx4.
Oddly it seems dwordx2 was skipped.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +-
.../AMDGPU/AMDGPUInstructionSelector.cpp | 10 ++
llvm/lib/Target/AMDGPU/FLATInstructions.td | 9 ++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 ++
.../llvm.amdgcn.global.load.lds.gfx950.ll | 137 ++++++++++++++++++
llvm/test/MC/AMDGPU/gfx950_asm_features.s | 37 +++++
llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 25 ++++
8 files changed, 236 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll
create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_features.s
create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 15f33cdbf92e6e..f43ab50d2ea441 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2452,7 +2452,7 @@ class AMDGPUGlobalLoadLDS :
[],
[LLVMQualPointerType<1>, // Base global pointer to load from
LLVMQualPointerType<3>, // LDS base pointer to store to
- llvm_i32_ty, // Data byte size: 1/2/4
+ llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
llvm_i32_ty, // imm offset (applied to both global and LDS address)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0,
// bit 1 = sc1,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 13de93e829fab2..a6ef0069f134bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3329,6 +3329,16 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
case 4:
Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
break;
+ case 12:
+ if (!Subtarget->hasLDSLoadB96_B128())
+ return false;
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
+ break;
+ case 16:
+ if (!Subtarget->hasLDSLoadB96_B128())
+ return false;
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
+ break;
}
MachineBasicBlock *MBB = MI.getParent();
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index db74372e9db452..861fcf017d9e4d 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -934,6 +934,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho
defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
+let SubtargetPredicate = HasGFX950Insts in {
+defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx3">;
+defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">;
+}
+
let SubtargetPredicate = isGFX12Plus in {
defm GLOBAL_ATOMIC_COND_SUB_U32 : FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>;
defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
@@ -1980,6 +1985,10 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>;
defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>;
defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>;
+defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>;
+defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>;
+
+
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>;
defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4a6efe533230b1..f3f96940c1f44b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1289,6 +1289,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// hasGFX940Insts and hasGFX90AInsts are also true.
bool hasGFX950Insts() const { return GFX950Insts; }
+ /// Returns true if the target supports
+ /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
+ /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
+ bool hasLDSLoadB96_B128() const {
+ return hasGFX950Insts();
+ }
+
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ad89812558d25c..0f7764906527d0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9894,6 +9894,16 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case 4:
Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
break;
+ case 12:
+ if (!Subtarget->hasLDSLoadB96_B128())
+ return SDValue();
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
+ break;
+ case 16:
+ if (!Subtarget->hasLDSLoadB96_B128())
+ return SDValue();
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
+ break;
}
auto *M = cast<MemSDNode>(Op);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll
new file mode 100644
index 00000000000000..b7819ea0431588
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
+
+declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
+
+;---------------------------------------------------------------------y
+; dwordx3
+;---------------------------------------------------------------------
+
+define amdgpu_ps void @global_load_lds_dwordx3_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
+; GFX950-SDAG-LABEL: global_load_lds_dwordx3_vaddr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s0, v2
+; GFX950-SDAG-NEXT: s_mov_b32 m0, s0
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: global_load_lds_dwordx3 v[0:1], off offset:16 sc0
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: global_load_lds_dwordx3_vaddr:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: v_readfirstlane_b32 m0, v2
+; GFX950-GISEL-NEXT: s_nop 4
+; GFX950-GISEL-NEXT: global_load_lds_dwordx3 v[0:1], off offset:16 sc0
+; GFX950-GISEL-NEXT: s_endpgm
+ call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 12, i32 16, i32 1)
+ ret void
+}
+
+define amdgpu_ps void @global_load_lds_dwordx3_saddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr) {
+; GFX950-SDAG-LABEL: global_load_lds_dwordx3_saddr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: s_mov_b32 m0, s2
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: global_load_lds_dwordx3 v1, s[0:1] offset:32 nt
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: global_load_lds_dwordx3_saddr:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: v_readfirstlane_b32 m0, v0
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-GISEL-NEXT: s_nop 3
+; GFX950-GISEL-NEXT: global_load_lds_dwordx3 v0, s[0:1] offset:32 nt
+; GFX950-GISEL-NEXT: s_endpgm
+ call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 12, i32 32, i32 2)
+ ret void
+}
+
+define amdgpu_ps void @global_load_lds_dwordx3_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
+; GFX950-SDAG-LABEL: global_load_lds_dwordx3_saddr_and_vaddr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX950-SDAG-NEXT: s_mov_b32 m0, s2
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: global_load_lds_dwordx3 v1, s[0:1] offset:48 sc1
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: global_load_lds_dwordx3_saddr_and_vaddr:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: v_readfirstlane_b32 m0, v0
+; GFX950-GISEL-NEXT: s_nop 4
+; GFX950-GISEL-NEXT: global_load_lds_dwordx3 v1, s[0:1] offset:48 sc1
+; GFX950-GISEL-NEXT: s_endpgm
+ %voffset.64 = zext i32 %voffset to i64
+ %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
+ call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 12, i32 48, i32 16)
+ ret void
+}
+
+;---------------------------------------------------------------------
+; dwordx4
+;---------------------------------------------------------------------
+
+define amdgpu_ps void @global_load_lds_dwordx4_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
+; GFX950-SDAG-LABEL: global_load_lds_dwordx4_vaddr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s0, v2
+; GFX950-SDAG-NEXT: s_mov_b32 m0, s0
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: global_load_lds_dwordx4 v[0:1], off offset:16 sc0
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: global_load_lds_dwordx4_vaddr:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: v_readfirstlane_b32 m0, v2
+; GFX950-GISEL-NEXT: s_nop 4
+; GFX950-GISEL-NEXT: global_load_lds_dwordx4 v[0:1], off offset:16 sc0
+; GFX950-GISEL-NEXT: s_endpgm
+ call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 16, i32 16, i32 1)
+ ret void
+}
+
+define amdgpu_ps void @global_load_lds_dwordx4_saddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr) {
+; GFX950-SDAG-LABEL: global_load_lds_dwordx4_saddr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: s_mov_b32 m0, s2
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: global_load_lds_dwordx4 v1, s[0:1] offset:32 nt
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: global_load_lds_dwordx4_saddr:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: v_readfirstlane_b32 m0, v0
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-GISEL-NEXT: s_nop 3
+; GFX950-GISEL-NEXT: global_load_lds_dwordx4 v0, s[0:1] offset:32 nt
+; GFX950-GISEL-NEXT: s_endpgm
+ call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 16, i32 32, i32 2)
+ ret void
+}
+
+define amdgpu_ps void @global_load_lds_dwordx4_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
+; GFX950-SDAG-LABEL: global_load_lds_dwordx4_saddr_and_vaddr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX950-SDAG-NEXT: s_mov_b32 m0, s2
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: global_load_lds_dwordx4 v1, s[0:1] offset:48 sc1
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: global_load_lds_dwordx4_saddr_and_vaddr:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: v_readfirstlane_b32 m0, v0
+; GFX950-GISEL-NEXT: s_nop 4
+; GFX950-GISEL-NEXT: global_load_lds_dwordx4 v1, s[0:1] offset:48 sc1
+; GFX950-GISEL-NEXT: s_endpgm
+ %voffset.64 = zext i32 %voffset to i64
+ %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
+ call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 16, i32 48, i32 16)
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX950: {{.*}}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
new file mode 100644
index 00000000000000..405d152c93d867
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 --strict-whitespace %s
+// xUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX950,GFX940 --implicit-check-not=error: %s
+// xUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX950,GFX90A --implicit-check-not=error: %s
+// xUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX950,GFX10 --implicit-check-not=error: %s
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX950: global_load_lds_dwordx3 v[2:3], off ; encoding: [0x00,0x80,0xf8,0xdd,0x02,0x00,0x7f,0x00]
+
+global_load_lds_dwordx3 v[2:3], off
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: global_load_lds_dwordx3 v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xfb,0xdf,0x02,0x00,0x7f,0x00]
+global_load_lds_dwordx3 v[2:3], off sc0 nt sc1
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: global_load_lds_dwordx3 v[2:3], off offset:4 ; encoding: [0x04,0x80,0xf8,0xdd,0x02,0x00,0x7f,0x00]
+global_load_lds_dwordx3 v[2:3], off offset:4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: global_load_lds_dwordx3 v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xf8,0xdd,0x02,0x00,0x04,0x00]
+global_load_lds_dwordx3 v2, s[4:5] offset:4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX950: global_load_lds_dwordx4 v[2:3], off ; encoding: [0x00,0x80,0xf4,0xdd,0x02,0x00,0x7f,0x00]
+global_load_lds_dwordx4 v[2:3], off
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: global_load_lds_dwordx4 v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xf7,0xdf,0x02,0x00,0x7f,0x00]
+global_load_lds_dwordx4 v[2:3], off sc0 nt sc1
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: global_load_lds_dwordx4 v[2:3], off offset:4 ; encoding: [0x04,0x80,0xf4,0xdd,0x02,0x00,0x7f,0x00]
+global_load_lds_dwordx4 v[2:3], off offset:4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: global_load_lds_dwordx4 v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xf4,0xdd,0x02,0x00,0x04,0x00]
+global_load_lds_dwordx4 v2, s[4:5] offset:4
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
new file mode 100644
index 00000000000000..a9f28332860ee5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX950 %s
+
+# GFX950: global_load_lds_dwordx3 v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xf8,0xdd,0x02,0x00,0x04,0x00]
+0x04,0x80,0xf8,0xdd,0x02,0x00,0x04,0x00
+
+# GFX950: global_load_lds_dwordx3 v[2:3], off ; encoding: [0x00,0x80,0xf8,0xdd,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xf8,0xdd,0x02,0x00,0x7f,0x00
+
+# GFX950: global_load_lds_dwordx3 v[2:3], off offset:4 ; encoding: [0x04,0x80,0xf8,0xdd,0x02,0x00,0x7f,0x00]
+0x04,0x80,0xf8,0xdd,0x02,0x00,0x7f,0x00
+
+# GFX950: global_load_lds_dwordx3 v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xfb,0xdf,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xfb,0xdf,0x02,0x00,0x7f,0x00
+
+# GFX950: global_load_lds_dwordx4 v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xf4,0xdd,0x02,0x00,0x04,0x00]
+0x04,0x80,0xf4,0xdd,0x02,0x00,0x04,0x00
+
+# GFX950: global_load_lds_dwordx4 v[2:3], off ; encoding: [0x00,0x80,0xf4,0xdd,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xf4,0xdd,0x02,0x00,0x7f,0x00
+
+# GFX950: global_load_lds_dwordx4 v[2:3], off offset:4 ; encoding: [0x04,0x80,0xf4,0xdd,0x02,0x00,0x7f,0x00]
+0x04,0x80,0xf4,0xdd,0x02,0x00,0x7f,0x00
+
+# GFX950: global_load_lds_dwordx4 v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xf7,0xdf,0x02,0x00,0x7f,0x00]
+0x00,0x80,0xf7,0xdf,0x02,0x00,0x7f,0x00
More information about the llvm-branch-commits
mailing list