[llvm] [AMDGPU] Support nv memory instructions modifier on gfx1250 (PR #149582)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 18 13:13:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/149582.diff
13 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+25-1)
- (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+1)
- (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+2-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+2)
- (modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+1)
- (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+2-1)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_smem.s (+14)
- (added) llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s (+20)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s (+60)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt (+7)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt (+10)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt (+30)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 43d4e8db791b0..de17fccdda902 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5280,6 +5280,15 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
unsigned CPol = Inst.getOperand(CPolPos).getImm();
+ if (!isGFX1250()) {
+ if (CPol & CPol::NV) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ StringRef CStr(S.getPointer());
+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
+ Error(S, "nv is not supported on this GPU");
+ }
+ }
+
if (isGFX12Plus())
return validateTHAndScopeBits(Inst, Operands, CPol);
@@ -6916,6 +6925,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
int64_t CPolVal = 0;
ParseStatus ResTH = ParseStatus::NoMatch;
ParseStatus ResScope = ParseStatus::NoMatch;
+ ParseStatus ResNV = ParseStatus::NoMatch;
for (;;) {
if (ResTH.isNoMatch()) {
@@ -6940,10 +6950,24 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
}
}
+ // NV bit exists on GFX12+, but does something starting from GFX1250.
+ // Allow parsing on all GFX12 and fail on validation for better
+ // diagnostics.
+ if (ResNV.isNoMatch()) {
+ if (trySkipId("nv")) {
+ ResNV = ParseStatus::Success;
+ CPolVal |= CPol::NV;
+ continue;
+ } else if (trySkipId("no", "nv")) {
+ ResNV = ParseStatus::Success;
+ continue;
+ }
+ }
+
break;
}
- if (ResTH.isNoMatch() && ResScope.isNoMatch())
+ if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch())
return ParseStatus::NoMatch;
Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 0caabe41e9b79..e994aeeb82251 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -2451,6 +2451,7 @@ class VBUFFER_Real <bits<8> op, BUF_Pseudo ps, string real_name> :
let Inst{62} = ps.offen;
let Inst{63} = ps.idxen;
+ let Inst{7} = cpol{5}; // nv
let Inst{54-53} = cpol{2-1}; // th{2-1}
let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
let Inst{51-50} = cpol{4-3}; // scope
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1432b5940f3f0..f7f29f17f9d0e 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -183,7 +183,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
bits<7> saddr;
bits<8> vdst;
- bits<6> cpol;
+ bits<12> cpol;
bits<8> vdata; // vsrc
bits<8> vaddr;
bits<24> offset;
@@ -193,6 +193,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
let Inst{31-26} = 0x3b;
let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
let Inst{49} = ps.sve;
+ let Inst{7} = cpol{5}; // nv
let Inst{54-53} = cpol{2-1}; // th{2-1}
let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
let Inst{51-50} = cpol{4-3}; // scope
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index ec9248b972ec4..44d2f947ec9c2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -160,6 +160,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
printTH(MI, TH, Scope, O);
printScope(Scope, O);
+ if (Imm & CPol::NV)
+ O << " nv";
+
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index a8649970aa825..edc74605ab241 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -398,6 +398,8 @@ enum CPol {
SCOPE_DEV = 2 << 3,
SCOPE_SYS = 3 << 3,
+ NV = 1 << 5, // Non-volatile bit
+
SWZ = 1 << 6, // Swizzle bit
ALL = TH | SCOPE,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index a368bc5d0b1a1..6b419347c01d9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -317,6 +317,7 @@ def CPolBit {
int SLC = 1;
int DLC = 2;
int SCC = 4;
+ int NV = 5;
}
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 37dcc10086257..d8b52d271a964 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -87,7 +87,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
bits<7> sdst;
bits<32> offset;
bits<8> soffset;
- bits<5> cpol;
+ bits<12> cpol;
}
class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -1485,6 +1485,7 @@ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offs
RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
+ let Inst{20} = cpol{CPolBit.NV}; // non-volatile
let Inst{22-21} = cpol{4-3}; // scope
let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
}
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
new file mode 100644
index 0000000000000..899c4c7aca0ba
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+s_load_b32 s4, s[2:3], 10 nv
+// GFX1250: s_load_b32 s4, s[2:3], 0xa nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 10 nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+s_buffer_load_i8 s5, s[4:7], s0 nv
+// GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 nv
+// GFX12-ERR-NEXT:{{^}} ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
new file mode 100644
index 0000000000000..1d14bd91a7569
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
+// GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}} ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
index 737d7b3de4e92..488040e1b5390 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -1,6 +1,66 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+global_load_b32 v0, v[2:3], off nv
+// GFX1250: global_load_b32 v0, v[2:3], off nv ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_load_b32 v0, v[2:3], off nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+global_store_b32 v[2:3], v0, off nv
+// GFX1250: global_store_b32 v[2:3], v0, off nv ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_store_b32 v[2:3], v0, off nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+global_atomic_add v[2:3], v2, off nv
+// GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_atomic_add v[2:3], v2, off nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+global_load_addtid_b32 v5, s[2:3] nv
+// GFX1250: global_load_addtid_b32 v5, s[2:3] nv ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_load_addtid_b32 v5, s[2:3] nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+scratch_load_b32 v0, v2, off nv
+// GFX1250: scratch_load_b32 v0, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v0, v2, off nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+scratch_store_b32 v2, v0, off nv
+// GFX1250: scratch_store_b32 v2, v0, off nv ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v0, off nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+flat_load_b32 v0, v[2:3] nv
+// GFX1250: flat_load_b32 v0, v[2:3] nv ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_load_b32 v0, v[2:3] nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+flat_store_b32 v[2:3], v0 nv
+// GFX1250: flat_store_b32 v[2:3], v0 nv ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_store_b32 v[2:3], v0 nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+flat_atomic_add v[2:3], v2 nv
+// GFX1250: flat_atomic_add_u32 v[2:3], v2 nv ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_atomic_add v[2:3], v2 nv
+// GFX12-ERR-NEXT:{{^}} ^
+
+scratch_load_b32 v5, v2, off nv
+// GFX1250: scratch_load_b32 v5, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off nv
+// GFX12-ERR-NEXT:{{^}} ^
+
tensor_save s[0:1]
// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt
new file mode 100644
index 0000000000000..4bd9ab45af6bd
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00
+
+# GFX1250: s_load_b32 s4, s[2:3], 0xa nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
+0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
new file mode 100644
index 0000000000000..a2f12115bb64b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00
+
+# GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00
+
+# GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
index 55bc3e7a5746c..fcbb58bf7865e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
@@ -2826,6 +2826,36 @@
# GFX1250: scratch_store_d16_hi_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
+# GFX1250: flat_atomic_add_u32 v[2:3], v2 nv ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00
+
+# GFX1250: flat_load_b32 v0, v[2:3] nv ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: flat_store_b32 v[2:3], v0 nv ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00
+
+# GFX1250: global_load_addtid_b32 v5, s[2:3] nv ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: global_load_b32 v0, v[2:3], off nv ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: global_store_b32 v[2:3], v0, off nv ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_load_b32 v0, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_store_b32 v2, v0, off nv ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_load_b32 v5, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
# GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
``````````
</details>
https://github.com/llvm/llvm-project/pull/149582
More information about the llvm-commits
mailing list