[llvm] 0247a75 - AMDGPU: Add some tests for folding immediates into subregister uses (#129663)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 4 08:12:33 PST 2025


Author: Matt Arsenault
Date: 2025-03-04T23:12:29+07:00
New Revision: 0247a75072874d1238db89a88bed28ceea4c8625

URL: https://github.com/llvm/llvm-project/commit/0247a75072874d1238db89a88bed28ceea4c8625
DIFF: https://github.com/llvm/llvm-project/commit/0247a75072874d1238db89a88bed28ceea4c8625.diff

LOG: AMDGPU: Add some tests for folding immediates into subregister uses (#129663)

Added: 
    llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir

Modified: 
    llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
index 6ab1395a0dcca..413408b417c5a 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
@@ -393,3 +393,37 @@ body:             |
     SI_RETURN implicit $vgpr0, implicit $vgpr1
 
 ...
+
+---
+name:  fold_frame_index__through_reg_sequence_to_user_subreg
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $sgpr8
+    ; CHECK-LABEL: name: fold_frame_index__through_reg_sequence_to_user_subreg
+    ; CHECK: liveins: $sgpr8
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def $scc
+    ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], 123, implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: $sgpr5 = COPY [[S_ADD_I32_1]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4, implicit $sgpr5
+    %0:sreg_32 = COPY $sgpr8
+    %1:sreg_32 = S_MOV_B32 123
+    %2:sreg_32 = S_MOV_B32 %stack.0
+    %3:sreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
+    %4:sreg_32 = S_ADD_I32 %0, %3.sub1, implicit-def $scc
+    %5:sreg_32 = S_ADD_I32 %0, %3.sub0, implicit-def $scc
+    $sgpr4 = COPY %4
+    $sgpr5 = COPY %5
+    SI_RETURN implicit $sgpr4, implicit $sgpr5
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
new file mode 100644
index 0000000000000..591bda2b22f12
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=si-fold-operands -o - %s | FileCheck %s
+
+# Make sure materializes of 64-bit immediates fold the correct value
+# into subregister uses.
+
+---
+name:            s_mov_b64_sub1_folds_wrong_value_0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: s_mov_b64_sub1_folds_wrong_value_0
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
+    ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[REG_SEQUENCE]].sub0, 8, implicit-def $scc
+    ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[REG_SEQUENCE]].sub1, 8, implicit-def $scc, implicit $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
+    %2:sreg_64 = S_MOV_B64 8
+    %3:sreg_32 = S_ADD_U32 %1.sub0, %2.sub0, implicit-def $scc
+    %4:sreg_32 = S_ADDC_U32 %1.sub1, %2.sub1, implicit-def $scc, implicit $scc
+    %5:sreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+    S_ENDPGM 0, implicit %5
+
+...
+
+---
+name:            v_mov_b64_pseudo_sub1_folds_wrong_value
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr8_vgpr9
+
+    ; CHECK-LABEL: name: v_mov_b64_pseudo_sub1_folds_wrong_value
+    ; CHECK: liveins: $vgpr8_vgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub0, 30064771075, 0, implicit $exec
+    ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[REG_SEQUENCE]].sub1, 30064771075, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+    %0:vreg_64 = COPY $vgpr8_vgpr9
+    %1:vreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
+    %2:vreg_64 = V_MOV_B64_PSEUDO 30064771075, implicit $exec
+    %3:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %2.sub0, 0, implicit $exec
+    %4:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %1.sub1, %2.sub1, %6, 0, implicit $exec
+    %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+    S_ENDPGM 0, implicit %5
+
+...
+
+---
+name:            subreg_fold_imm
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9, $vgpr8_vgpr9
+
+    ; CHECK-LABEL: name: subreg_fold_imm
+    ; CHECK: liveins: $sgpr8_sgpr9, $vgpr8_vgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub1, [[COPY1]].sub1, 0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
+    %2:vreg_64 = COPY %0
+    %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub1, %2.sub1, 0, implicit $exec
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            s_mov_b64_into_reg_sequence_user
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: s_mov_b64_into_reg_sequence_user
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
+    ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = S_MOV_B64 8
+    %2:sreg_64 = REG_SEQUENCE %1.sub1, %subreg.sub0, %1.sub0, %subreg.sub1
+    %3:sreg_64 = S_AND_B64 %0, %2, implicit-def $scc
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            s_mov_b64_into_reg_sequence_user_with_subregs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: s_mov_b64_into_reg_sequence_user_with_subregs
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
+    ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 8, implicit-def $scc
+    ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]].sub1, 8, implicit-def $scc, implicit $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADDC_U32_]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = S_MOV_B64 8
+    %2:sreg_64 = REG_SEQUENCE %1.sub1, %subreg.sub0, %1.sub0, %subreg.sub1
+    %3:sreg_32 = S_ADD_U32 %0.sub0, %2.sub0, implicit-def $scc
+    %4:sreg_32 = S_ADDC_U32 %0.sub1, %2.sub1, implicit-def $scc, implicit $scc
+    S_ENDPGM 0, implicit %3, implicit %4
+
+...
+


        


More information about the llvm-commits mailing list