[llvm] [AMDGPU] Auto-generate lit pattern for test CodeGen/AMDGPU/merge-sbuffer-load.mir. (PR #101618)

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 22:45:47 PDT 2024


https://github.com/cdevadas updated https://github.com/llvm/llvm-project/pull/101618

>From ecfedc9b5126ce18f2b8b2bbd69844759a859143 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 30 Jul 2024 16:45:21 +0530
Subject: [PATCH] [AMDGPU] Auto-generate lit pattern for test
 CodeGen/AMDGPU/merge-sbuffer-load.mir.

---
 .../CodeGen/AMDGPU/merge-sbuffer-load.mir     | 265 +++++++++++++++---
 1 file changed, 231 insertions(+), 34 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir b/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir
index 1b2f672fd57bb..f8502091f8b78 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir
@@ -1,14 +1,22 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX12
 
-# CHECK-LABEL: name: merge_s_buffer_load_x2
-# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 4)
+---
 name: merge_s_buffer_load_x2
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; CHECK-LABEL: name: merge_s_buffer_load_x2
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_BUFFER_LOAD_DWORDX2_IMM]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX2_IMM]].sub1
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -17,15 +25,19 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x1_x2
-# CHECK: S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 4, 0 :: (dereferenceable invariant load (s64))
 name: merge_s_buffer_load_x1_x2
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; CHECK-LABEL: name: merge_s_buffer_load_x1_x2
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s64))
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
     %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64))
@@ -34,16 +46,28 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x2_x1
-# GFX10: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64))
-# GFX10: S_BUFFER_LOAD_DWORD_IMM %0, 8, 0 :: (dereferenceable invariant load (s32))
-# GFX12: S_BUFFER_LOAD_DWORDX3_IMM %0, 0, 0 :: (dereferenceable invariant load (s96), align 8)
 name: merge_s_buffer_load_x2_x1
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; GFX10-LABEL: name: merge_s_buffer_load_x2_x1
+    ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64))
+    ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32))
+    ; GFX10-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: merge_s_buffer_load_x2_x1
+    ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8)
+    ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2
+    ; GFX12-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32))
@@ -52,14 +76,37 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x4
-# CHECK: S_BUFFER_LOAD_DWORDX4_IMM %0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
 name: merge_s_buffer_load_x4
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; GFX10-LABEL: name: merge_s_buffer_load_x4
+    ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub2_sub3
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY1]].sub0
+    ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub1
+    ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0
+    ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1
+    ; GFX10-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: merge_s_buffer_load_x4
+    ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+    ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3
+    ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
+    ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub2
+    ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+    ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+    ; GFX12-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -70,15 +117,19 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x1_x3
-# CHECK: S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORDX3_IMM %0, 4, 0 :: (dereferenceable invariant load (s96), align 16)
 name: merge_s_buffer_load_x1_x3
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; CHECK-LABEL: name: merge_s_buffer_load_x1_x3
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16)
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
     %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96))
@@ -87,14 +138,20 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x3_x1
-# CHECK: S_BUFFER_LOAD_DWORDX4_IMM %0, 0, 0 :: (dereferenceable invariant load (s128))
 name: merge_s_buffer_load_x3_x1
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; CHECK-LABEL: name: merge_s_buffer_load_x3_x1
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128))
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96))
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32))
@@ -103,14 +160,53 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x8
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 4)
 name: merge_s_buffer_load_x8
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; GFX10-LABEL: name: merge_s_buffer_load_x8
+    ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
+    ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3
+    ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+    ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+    ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0
+    ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1
+    ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+    ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3
+    ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY9]].sub0
+    ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub1
+    ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY10]].sub0
+    ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY10]].sub1
+    ; GFX10-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: merge_s_buffer_load_x8
+    ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+    ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+    ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
+    ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3
+    ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1
+    ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2
+    ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub0
+    ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub1
+    ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2
+    ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3
+    ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1
+    ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2
+    ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub0
+    ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub1
+    ; GFX12-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -125,14 +221,53 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x8_reordered
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 4)
 name: merge_s_buffer_load_x8_reordered
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; GFX10-LABEL: name: merge_s_buffer_load_x8_reordered
+    ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
+    ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3
+    ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub1
+    ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub0
+    ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+    ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3
+    ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY7]].sub1
+    ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY7]].sub0
+    ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub1
+    ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub0
+    ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY8]].sub1
+    ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY8]].sub0
+    ; GFX10-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: merge_s_buffer_load_x8_reordered
+    ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+    ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+    ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
+    ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3
+    ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1
+    ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2
+    ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub1
+    ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub0
+    ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2
+    ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3
+    ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1
+    ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2
+    ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub1
+    ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub0
+    ; GFX12-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32))
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -147,14 +282,24 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x2
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 8)
 name: merge_s_buffer_load_x8_out_of_x2
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x2
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64))
     %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
@@ -165,14 +310,20 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x4
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 16)
 name: merge_s_buffer_load_x8_out_of_x4
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x4
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
     %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
@@ -181,14 +332,24 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_x8_mixed
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 16)
 name: merge_s_buffer_load_x8_mixed
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
 
+    ; CHECK-LABEL: name: merge_s_buffer_load_x8_mixed
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
@@ -199,14 +360,39 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm
-# CHECK: S_BUFFER_LOAD_DWORDX4_SGPR_IMM %0, %1, 0, 0 :: (dereferenceable invariant load (s128), align 4)
 name: merge_s_buffer_load_sgpr_imm
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
 
+    ; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm
+    ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3
+    ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0
+    ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1
+    ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+    ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+    ; GFX10-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm
+    ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1_sub2
+    ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub3
+    ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+    ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub2
+    ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0
+    ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1
+    ; GFX12-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32 = COPY $sgpr4
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
@@ -218,15 +404,21 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: no_merge_for_different_soffsets
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 4, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %2, 8, 0 :: (dereferenceable invariant load (s32))
 name: no_merge_for_different_soffsets
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
 
+    ; CHECK-LABEL: name: no_merge_for_different_soffsets
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr5
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY2]], 8, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32 = COPY $sgpr4
     %2:sreg_32 = COPY $sgpr5
@@ -237,15 +429,20 @@ body:             |
 ...
 ---
 
-# CHECK-LABEL: name: no_merge_for_non_adjacent_offsets
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 4, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 12, 0 :: (dereferenceable invariant load (s32))
 name: no_merge_for_non_adjacent_offsets
 tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
 
+    ; CHECK-LABEL: name: no_merge_for_non_adjacent_offsets
+    ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 12, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:sreg_32 = COPY $sgpr4
     %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32))



More information about the llvm-commits mailing list