[llvm] 4cd1f9a - AMDGPU: Add baseline test for frame index folding (#110737)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 2 08:47:36 PDT 2024


Author: Matt Arsenault
Date: 2024-10-02T19:47:32+04:00
New Revision: 4cd1f9ac9fce8042bd9c339a92d1f4936ca0ef14

URL: https://github.com/llvm/llvm-project/commit/4cd1f9ac9fce8042bd9c339a92d1f4936ca0ef14
DIFF: https://github.com/llvm/llvm-project/commit/4cd1f9ac9fce8042bd9c339a92d1f4936ca0ef14.diff

LOG: AMDGPU: Add baseline test for frame index folding (#110737)

We currently can increase the instruction count
when a frame index requires materialization.

Added: 
    llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir
    llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir
new file mode 100644
index 00000000000000..0d6511cbfceb21
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
+
+---
+name:  fold_frame_index__v_add_u32_e32__const_v_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_U32_e32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_ADD_U32_e32 128, %0, implicit $exec
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...
+
+---
+name:  fold_frame_index__v_add_co_u32_e64__v_fi_const
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 %0, 128, 0, implicit $exec
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...
+
+
+
+---
+name:  fold_frame_index__v_add_u32_e64__const_v_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__const_v_fi
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_ADD_U32_e64 128, %0, 0, implicit $exec
+    $sgpr4 = COPY %1
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__v_add_u32_e64___v_fi_const
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_const
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
+    ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_ADD_U32_e64 %0, 128, 0, implicit $exec
+    $sgpr4 = COPY %1
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__v_add_co_u32_e64___fi_const_v
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64___fi_const_v
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 128, %0, 0, implicit $exec
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...
+
+---
+name:  fold_frame_index__v_add_co_u32_e64__v_fi_imm
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
+    ; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...
+

diff  --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
new file mode 100644
index 00000000000000..aa91a4f9f988fc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
@@ -0,0 +1,446 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX12 %s
+
+---
+name:  fold_frame_index__s_add_i32__fi_const
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_const
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MOV_B32_]], 128, implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_ADD_I32 %0, 128, implicit-def $scc
+    $sgpr4 = COPY %1
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__s_add_i32__const_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__s_add_i32__const_fi
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, [[S_MOV_B32_]], implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_ADD_I32 128, %0, implicit-def $scc
+    $sgpr4 = COPY %1
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__s_add_i32__materializedconst_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__s_add_i32__materializedconst_fi
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:sreg_32 = S_MOV_B32 256
+    %1:sreg_32 = S_MOV_B32 %stack.0
+    %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc
+    $sgpr4 = COPY %2
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__s_add_i32__fi_materializedconst_0
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 256
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]], implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_MOV_B32 256
+    %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc
+    $sgpr4 = COPY %2
+    SI_RETURN implicit $sgpr4
+...
+
+
+---
+name:  fold_frame_index__s_add_i32__fi_materializedconst_1
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_1
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:sreg_32 = S_MOV_B32 256
+    %1:sreg_32 = S_MOV_B32 %stack.0
+    %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc
+    $sgpr4 = COPY %2
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__s_add_i32__reg_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $sgpr4
+    ; CHECK-LABEL: name: fold_frame_index__s_add_i32__reg_fi
+    ; CHECK: liveins: $sgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:sreg_32 = COPY $sgpr4
+    %1:sreg_32 = S_MOV_B32 %stack.0
+    %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc
+    $sgpr4 = COPY %2
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__s_add_i32__fi_reg
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $sgpr4
+    ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_reg
+    ; CHECK: liveins: $sgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:sreg_32 = COPY $sgpr4
+    %1:sreg_32 = S_MOV_B32 %stack.0
+    %2:sreg_32 = S_ADD_I32 %1, %0, implicit-def $scc
+    $sgpr4 = COPY %2
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__v_add_u32_e32__const_v_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
+    ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_ADD_U32_e32 128, %0, implicit $exec
+    $sgpr4 = COPY %1
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__v_add_u32_e32__materialized_v_const_v_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__materialized_v_const_v_fi
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 128, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit $exec
+    ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_e32 128, implicit $exec
+    %2:vgpr_32 = V_ADD_U32_e32 %1, %0, implicit $exec
+    $sgpr4 = COPY %2
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__v_add_u32_e64__imm_v_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; GFX9-NEXT: SI_RETURN implicit $sgpr4
+    ;
+    ; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
+    ; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; GFX10-NEXT: SI_RETURN implicit $sgpr4
+    ;
+    ; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
+    ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; GFX12-NEXT: SI_RETURN implicit $sgpr4
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_ADD_U32_e64 64, %0, 0, implicit $exec
+    $sgpr4 = COPY %1
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__v_add_u32_e64___v_fi_imm
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
+    ; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; GFX9-NEXT: SI_RETURN implicit $sgpr4
+    ;
+    ; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
+    ; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; GFX10-NEXT: SI_RETURN implicit $sgpr4
+    ;
+    ; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
+    ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
+    ; GFX12-NEXT: SI_RETURN implicit $sgpr4
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_ADD_U32_e64 %0, 64, 0, implicit $exec
+    $sgpr4 = COPY %1
+    SI_RETURN implicit $sgpr4
+...
+
+---
+name:  fold_frame_index__v_add_co_u32_e32__const_v_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e32__const_v_fi
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32 = V_ADD_CO_U32_e32 128, %0, implicit-def $vcc, implicit $exec
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...
+
+---
+name:  fold_frame_index__v_add_co_u32_e64__v_fi_imm
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
+    ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; GFX9-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
+    ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
+    ; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
+    ; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...
+
+---
+name:  fold_frame_index__v_add_co_u32_e64__imm_v_fi
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; GFX9-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
+    ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
+    ; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
+    ; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 64, %0, 0, implicit $exec
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...
+
+---
+name:  multi_use_scalar_fi__add_imm_add_inline_imm
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: multi_use_scalar_fi__add_imm_add_inline_imm
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MOV_B32_]], 16380, implicit-def dead $scc
+    ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 56, implicit-def dead $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: $sgpr5 = COPY [[S_ADD_I32_1]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4, implicit $sgpr5
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = COPY $sgpr1
+    %2:sreg_32 = S_MOV_B32 16380
+    %3:sreg_32 = S_MOV_B32 56
+    %4:sreg_32 = S_MOV_B32 %stack.0
+    %5:sreg_32 = S_ADD_I32 %4, killed %2, implicit-def dead $scc
+    %6:sreg_32 = S_ADD_I32 %4, killed %3, implicit-def dead $scc
+    $sgpr4 = COPY %5
+    $sgpr5 = COPY %6
+    SI_RETURN implicit $sgpr4, implicit $sgpr5
+...
+
+---
+name:  multi_add_use_vector_fi__add_imm_add_inline_imm
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX9-LABEL: name: multi_add_use_vector_fi__add_imm_add_inline_imm
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], killed [[COPY1]], 0, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 56, 0, implicit $exec
+    ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_U32_e64_]]
+    ; GFX9-NEXT: $vgpr1 = COPY [[V_ADD_U32_e64_1]]
+    ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    ;
+    ; GFX10-LABEL: name: multi_add_use_vector_fi__add_imm_add_inline_imm
+    ; GFX10: liveins: $vgpr0, $vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, killed [[COPY1]], 0, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 56, 0, implicit $exec
+    ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_U32_e64_]]
+    ; GFX10-NEXT: $vgpr1 = COPY [[V_ADD_U32_e64_1]]
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    ;
+    ; GFX12-LABEL: name: multi_add_use_vector_fi__add_imm_add_inline_imm
+    ; GFX12: liveins: $vgpr0, $vgpr1
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, killed [[COPY1]], 0, implicit $exec
+    ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 56, 0, implicit $exec
+    ; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_U32_e64_]]
+    ; GFX12-NEXT: $vgpr1 = COPY [[V_ADD_U32_e64_1]]
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %3:vgpr_32 = V_ADD_U32_e64 %2, killed %1, 0, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+    %5:vgpr_32 = COPY %3
+    %6:sreg_32 = S_MOV_B32 56
+    %7:vgpr_32 = V_ADD_U32_e64 %2, killed %6, 0, implicit $exec
+    %8:vgpr_32 = COPY %7
+    $vgpr0 = COPY %3
+    $vgpr1 = COPY %7
+    SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...


        


More information about the llvm-commits mailing list