[llvm-branch-commits] [llvm] [AMDGPU] Record old VGPR MSBs in the high bits of s_set_vgpr_msb (PR #165035)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Oct 24 13:09:37 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
Fixes: SWDEV-562450
---
Patch is 44.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165035.diff
6 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp (+13-3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir (+68-67)
- (modified) llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll (+18-18)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 0be1dd0817605..f9f0bc619d9f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -58,6 +58,8 @@ class AMDGPULowerVGPREncoding {
static constexpr unsigned BitsPerField = 2;
static constexpr unsigned NumFields = 4;
static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
+ static constexpr unsigned ModeWidth = NumFields * BitsPerField;
+ static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
using ModeType = PackedVector<unsigned, BitsPerField,
std::bitset<BitsPerField * NumFields>>;
@@ -152,13 +154,21 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
CurrentMode |= NewMode;
CurrentMask |= Mask;
- MostRecentModeSet->getOperand(0).setImm(CurrentMode);
+ MachineOperand &Op = MostRecentModeSet->getOperand(0);
+
+ // Carry old mode bits from the existing instruction.
+ int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
+
+ Op.setImm(CurrentMode | OldModeBits);
return true;
}
+ // Record previous mode into high 8 bits of the immediate.
+ int64_t OldModeBits = CurrentMode << ModeWidth;
+
I = handleClause(I);
- MostRecentModeSet =
- BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB)).addImm(NewMode);
+ MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
+ .addImm(NewMode | OldModeBits);
CurrentMode = NewMode;
CurrentMask = Mask;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 680e7eb3de6be..844649ebb9ae6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -412,7 +412,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
*OutStreamer);
if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
- unsigned V = MI->getOperand(0).getImm();
+ unsigned V = MI->getOperand(0).getImm() & 0xff;
OutStreamer->AddComment(
" msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) +
" src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3));
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 013cfeb364048..28b4da8ab9ebb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -168,7 +168,7 @@ bool AMDGPUMCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
void AMDGPUMCInstrAnalysis::updateState(const MCInst &Inst, uint64_t Addr) {
if (Inst.getOpcode() == AMDGPU::S_SET_VGPR_MSB_gfx12)
- VgprMSBs = Inst.getOperand(0).getImm();
+ VgprMSBs = Inst.getOperand(0).getImm() & 0xff;
else if (isTerminator(Inst))
VgprMSBs = 0;
}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
index 8a70a8acd28d3..32cc398740d62 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -36,7 +36,7 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/
$vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x8a
+ ; GCN-NEXT: s_set_vgpr_msb 0x458a
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
$vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode
@@ -50,7 +50,7 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/
$vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xcf
+ ; GCN-NEXT: s_set_vgpr_msb 0x8acf
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 2f53c6229ed09..b084ad63a4d5c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -22,13 +22,13 @@ body: |
$vgpr257 = V_MOV_B32_e32 undef $vgpr510, implicit $exec
; Single bit change
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4101
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
; GCN-NEXT: v_rcp_f32_e64 v255, v2 /*v258*/
$vgpr255 = V_RCP_F32_e64 0, undef $vgpr258, 0, 0, implicit $exec, implicit $mode
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: v_rcp_f32_e64 v255, v1
$vgpr255 = V_RCP_F32_e64 0, undef $vgpr1, 0, 0, implicit $exec, implicit $mode
@@ -40,7 +40,7 @@ body: |
; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/
$vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT: v_add_f32_e64 v2 /*v258*/, v0, v251 /*v507*/
@@ -48,7 +48,7 @@ body: |
; VOP3
- ; GCN-NEXT: s_set_vgpr_msb 0x55
+ ; GCN-NEXT: s_set_vgpr_msb 0x4455
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1
; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/
$vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
@@ -58,32 +58,32 @@ body: |
$vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
; Tuple crossing the 256 boundary
- ; GCN-NEXT: s_set_vgpr_msb 17
+ ; GCN-NEXT: s_set_vgpr_msb 0x5511
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1
; GCN-NEXT: v_mqsad_u32_u8 v[254:257], v[2:3] /*v[258:259]*/, v0, v[244:247] /*v[500:503]*/
$vgpr254_vgpr255_vgpr256_vgpr257 = V_MQSAD_U32_U8_e64 $vgpr258_vgpr259, $vgpr0, undef $vgpr500_vgpr501_vgpr502_vgpr503, 0, implicit $exec
; DPP/tied operand
- ; GCN-NEXT: s_set_vgpr_msb 0x45
+ ; GCN-NEXT: s_set_vgpr_msb 0x1145
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT: v_add_nc_u16_e64_dpp v0 /*v256*/, v1 /*v257*/, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
$vgpr256 = V_ADD_NC_U16_fake16_e64_dpp $vgpr256, 0, $vgpr257, 0, undef $vgpr258, 0, 0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 17
+ ; GCN-NEXT: s_set_vgpr_msb 0x4511
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1
; GCN-NEXT: v_add3_u32_e64_dpp v0, v1 /*v257*/, v0, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
$vgpr0 = V_ADD3_U32_e64_dpp $vgpr0, $vgpr257, $vgpr0, undef $vgpr258, 1, 15, 15, 1, implicit $exec
; DS (addr, data0, and data1 operands)
- ; GCN-NEXT: s_set_vgpr_msb 20
+ ; GCN-NEXT: s_set_vgpr_msb 0x1114
; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=1
; GCN-NEXT: ds_store_2addr_b32 v0, v248 /*v504*/, v249 /*v505*/ offset1:1
DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr504, undef $vgpr505, 0, 1, 0, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x1400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: ds_store_2addr_b32 v0, v248, v249 offset1:1
DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr248, undef $vgpr249, 0, 1, 0, implicit $exec
@@ -93,13 +93,13 @@ body: |
; GCN-NEXT: ds_load_b32 v0, v255 /*v511*/
$vgpr0 = DS_READ_B32_gfx9 $vgpr511, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x144
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: ds_add_rtn_u32 v255 /*v511*/, v0, v248 /*v504*/
$vgpr511 = DS_ADD_RTN_U32_gfx9 $vgpr0, undef $vgpr504, 0, 0, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: ds_add_rtn_u32 v0, v0, v0
$vgpr0 = DS_ADD_RTN_U32_gfx9 $vgpr0, $vgpr0, 0, 0, implicit $exec
@@ -111,17 +111,17 @@ body: |
; GCN-NEXT: global_load_b32 v2, v[2:3] /*v[258:259]*/, off
$vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr258_vgpr259, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 64
+ ; GCN-NEXT: s_set_vgpr_msb 0x140
; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0
; GCN-NEXT: global_load_b32 v255 /*v511*/, v0, s[0:1]
$vgpr511 = GLOBAL_LOAD_DWORD_SADDR undef $sgpr0_sgpr1, $vgpr0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4001
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
; GCN-NEXT: scratch_load_u8 v0, v255 /*v511*/, s0
$vgpr0 = SCRATCH_LOAD_UBYTE_SVS $vgpr511, undef $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: global_store_b32 v[0:1], v2, off
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -135,13 +135,13 @@ body: |
; GCN-NEXT: global_store_b96 v[0:1] /*v[256:257]*/, v[244:246] /*v[500:502]*/, off
GLOBAL_STORE_DWORDX3 $vgpr256_vgpr257, $vgpr500_vgpr501_vgpr502, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: flat_atomic_add_u32 v254 /*v510*/, v[0:1], v255 /*v511*/ th:TH_ATOMIC_RETURN
$vgpr510 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr511, 0, 1, implicit $exec, implicit $flat_scr
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: flat_atomic_add_u32 v0, v[0:1], v255 th:TH_ATOMIC_RETURN
$vgpr0 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr255, 0, 1, implicit $exec, implicit $flat_scr
@@ -156,12 +156,12 @@ body: |
; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0, s[8:11], s3 offen
$vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x4041
; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0
; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0 /*v256*/, s[8:11], s3 offen
$vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr256, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: buffer_store_b32 v0, v1, s[0:3], s3 offen
BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
@@ -171,7 +171,7 @@ body: |
; GCN-NEXT: buffer_store_b32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen
BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr256, $vgpr257, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s3 offen
BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec
@@ -183,44 +183,44 @@ body: |
; VGPRs above 512
- ; GCN-NEXT: s_set_vgpr_msb 0xaa
+ ; GCN-NEXT: s_set_vgpr_msb 0x41aa
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xab
+ ; GCN-NEXT: s_set_vgpr_msb 0xaaab
; ASM-SAME: ; msbs: dst=2 src0=3 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xae
+ ; GCN-NEXT: s_set_vgpr_msb 0xabae
; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xba
+ ; GCN-NEXT: s_set_vgpr_msb 0xaeba
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=3
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xea
+ ; GCN-NEXT: s_set_vgpr_msb 0xbaea
; ASM-SAME: ; msbs: dst=3 src0=2 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v255 /*v1023*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
$vgpr1023 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xff
+ ; GCN-NEXT: s_set_vgpr_msb 0xeaff
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=3
; GCN-NEXT: v_fma_f32 v0 /*v768*/, v1 /*v769*/, v2 /*v770*/, v3 /*v771*/
$vgpr768 = V_FMA_F32_e64 0, undef $vgpr769, 0, undef $vgpr770, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x42
+ ; GCN-NEXT: s_set_vgpr_msb 0xff42
; ASM-SAME: ; msbs: dst=1 src0=2 src1=0 src2=0
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/
$vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4200
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: v_fma_f32 v0, v1, v2, v3
$vgpr0 = V_FMA_F32_e64 0, undef $vgpr1, 0, undef $vgpr2, 0, undef $vgpr3, 0, 0, implicit $exec, implicit $mode
@@ -232,12 +232,12 @@ body: |
; GCN-NEXT: global_store_b96 v[0:1] /*v[512:513]*/, v[0:2] /*v[512:514]*/, off
GLOBAL_STORE_DWORDX3 $vgpr512_vgpr513, $vgpr512_vgpr513_vgpr514, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 11
+ ; GCN-NEXT: s_set_vgpr_msb 0xa0b
; ASM-SAME: ; msbs: dst=0 src0=3 src1=2 src2=0
; GCN-NEXT: global_store_b64 v[254:255] /*v[1022:1023]*/, v[254:255] /*v[766:767]*/, off
GLOBAL_STORE_DWORDX2 $vgpr1022_vgpr1023, $vgpr766_vgpr767, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x55
+ ; GCN-NEXT: s_set_vgpr_msb 0xb55
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1
; GCN-NEXT: v_wmma_f32_16x16x32_bf16 v[14:21] /*v[270:277]*/, v[26:33] /*v[282:289]*/, v[34:41] /*v[290:297]*/, v[14:21] /*v[270:277]*/
early-clobber $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, undef $vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287_vgpr288_vgpr289, 8, undef $vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297, 8, killed undef $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277, 0, 0, 0, 0, implicit $exec
@@ -247,6 +247,7 @@ body: |
...
# ASM-LABEL: {{^}}vopd:
+
# DIS-LABEL: <vopd>:
---
name: vopd
@@ -262,35 +263,35 @@ body: |
; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 /*v256*/, v3, v4
$vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x4041
; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 /*v256*/, v44 /*v300*/, v4
$vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 4
+ ; GCN-NEXT: s_set_vgpr_msb 0x4104
; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, v0, v1 /*v257*/
$vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x401
; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, v3
$vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef $vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 64
+ ; GCN-NEXT: s_set_vgpr_msb 0x140
; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 v0 /*v256*/, v1, v1
$vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 5
+ ; GCN-NEXT: s_set_vgpr_msb 0x4005
; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
$vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
$vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 16
+ ; GCN-NEXT: s_set_vgpr_msb 0x4410
; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
$vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x1000
; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3
$vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, $vgpr3, implicit $mode, implicit $exec
@@ -298,7 +299,7 @@ body: |
; GCN-NEXT: v_dual_fma_f32 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165035
More information about the llvm-branch-commits
mailing list