[llvm] 392f0c9 - [NFC][AMDGPU] Add a test to show the impact of wrong `s_mov_b64` instruction size (#180386)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 9 05:56:33 PST 2026


Author: Shilei Tian
Date: 2026-02-09T08:56:28-05:00
New Revision: 392f0c976728fdc3c1db1c4e93a8c26d66c5488d

URL: https://github.com/llvm/llvm-project/commit/392f0c976728fdc3c1db1c4e93a8c26d66c5488d
DIFF: https://github.com/llvm/llvm-project/commit/392f0c976728fdc3c1db1c4e93a8c26d66c5488d.diff

LOG: [NFC][AMDGPU] Add a test to show the impact of wrong `s_mov_b64` instruction size (#180386)

Added: 
    llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir
new file mode 100644
index 0000000000000..59c19f354ce54
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-s-branch-bits=4 -run-pass branch-relaxation %s -o - | FileCheck %s
+
+# Test that getInstSizeInBytes correctly estimates S_MOV_B64 with 64-bit
+# literal values on targets with 64-bit literal support (gfx1250).
+#
+# Values outside [0, 2^31-1] require 64-bit literal encoding, making the
+# instruction 12 bytes (4-byte opcode + 8-byte literal) instead of 8 bytes
+# (4-byte opcode + 4-byte literal).
+#
+# With -amdgpu-s-branch-bits=4, forward branches can reach at most +7 dwords.
+# Three S_MOV_B64 with 64-bit literals = 3 * 12 = 36 bytes = 9 dwords,
+# which exceeds the 7-dword limit, so the branch must be relaxed.
+#
+# Without the correct size estimation (8 bytes instead of 12), the total
+# would be 3 * 8 = 24 bytes = 6 dwords, fitting within the limit, and
+# relaxation would not occur. In a rare real-world scenario, this could lead to
+# an assembler error where branch size exceeds simm16.
+
+# The branch is relaxed: the original S_CBRANCH_SCC0 is inverted to
+# S_CBRANCH_SCC1 (skipping the long branch), and a new block (bb.3) is
+# inserted with S_ADD_PC_I64 for the long branch.
+
+---
+name: s_mov_b64_64bit_literal_size
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+body: |
+  ; CHECK-LABEL: name: s_mov_b64_64bit_literal_size
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = S_MOV_B64 4294967295
+  ; CHECK-NEXT:   $sgpr12_sgpr13 = S_MOV_B64 2147483648
+  ; CHECK-NEXT:   $sgpr14_sgpr15 = S_MOV_B64 -17
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr8
+    S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+
+  bb.1:
+    ; S_MOV_B64 with values requiring 64-bit literal encoding (12 bytes each).
+    ; These values are outside the [0, 2^31-1] range where 32-bit literal
+    ; can be used, so they need 64-bit literal encoding on gfx1250.
+    ; 0xFFFFFFFF (4294967295) is in [2^31, 2^32-1].
+    ; 0x80000000 (2147483648) is exactly 2^31.
+    ; -17 (0xFFFFFFFFFFFFFFEF) is a negative non-inline constant.
+    $sgpr10_sgpr11 = S_MOV_B64 4294967295
+    $sgpr12_sgpr13 = S_MOV_B64 2147483648
+    $sgpr14_sgpr15 = S_MOV_B64 -17
+
+  bb.2:
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list