[llvm] 392f0c9 - [NFC][AMDGPU] Add a test to show the impact of wrong `s_mov_b64` instruction size (#180386)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 9 05:56:33 PST 2026
Author: Shilei Tian
Date: 2026-02-09T08:56:28-05:00
New Revision: 392f0c976728fdc3c1db1c4e93a8c26d66c5488d
URL: https://github.com/llvm/llvm-project/commit/392f0c976728fdc3c1db1c4e93a8c26d66c5488d
DIFF: https://github.com/llvm/llvm-project/commit/392f0c976728fdc3c1db1c4e93a8c26d66c5488d.diff
LOG: [NFC][AMDGPU] Add a test to show the impact of wrong `s_mov_b64` instruction size (#180386)
Added:
llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir
new file mode 100644
index 0000000000000..59c19f354ce54
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-s-branch-bits=4 -run-pass branch-relaxation %s -o - | FileCheck %s
+
+# Test that getInstSizeInBytes correctly estimates S_MOV_B64 with 64-bit
+# literal values on targets with 64-bit literal support (gfx1250).
+#
+# Values outside [0, 2^31-1] require 64-bit literal encoding, making the
+# instruction 12 bytes (4-byte opcode + 8-byte literal) instead of 8 bytes
+# (4-byte opcode + 4-byte literal).
+#
+# With -amdgpu-s-branch-bits=4, forward branches can reach at most +7 dwords.
+# Three S_MOV_B64 with 64-bit literals = 3 * 12 = 36 bytes = 9 dwords,
+# which exceeds the 7-dword limit, so the branch must be relaxed.
+#
+# Without the correct size estimation (8 bytes instead of 12), the total
+# would be 3 * 8 = 24 bytes = 6 dwords, fitting within the limit, and
+# relaxation would not occur. In a rare real-world scenario, this could lead to
+# an assembler error where branch size exceeds simm16.
+
+# The branch is relaxed: the original S_CBRANCH_SCC0 is inverted to
+# S_CBRANCH_SCC1 (skipping the long branch), and a new block (bb.3) is
+# inserted with S_ADD_PC_I64 for the long branch.
+
+---
+name: s_mov_b64_64bit_literal_size
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ ; CHECK-LABEL: name: s_mov_b64_64bit_literal_size
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $sgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr10_sgpr11 = S_MOV_B64 4294967295
+ ; CHECK-NEXT: $sgpr12_sgpr13 = S_MOV_B64 2147483648
+ ; CHECK-NEXT: $sgpr14_sgpr15 = S_MOV_B64 -17
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr8
+ S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+
+ bb.1:
+ ; S_MOV_B64 with values requiring 64-bit literal encoding (12 bytes each).
+ ; These values are outside the [0, 2^31-1] range where 32-bit literal
+ ; can be used, so they need 64-bit literal encoding on gfx1250.
+ ; 0xFFFFFFFF (4294967295) is in [2^31, 2^32-1].
+ ; 0x80000000 (2147483648) is exactly 2^31.
+ ; -17 (0xFFFFFFFFFFFFFFEF) is a negative non-inline constant.
+ $sgpr10_sgpr11 = S_MOV_B64 4294967295
+ $sgpr12_sgpr13 = S_MOV_B64 2147483648
+ $sgpr14_sgpr15 = S_MOV_B64 -17
+
+ bb.2:
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list