[llvm] e4284a7 - [AMDGPU] 4-align SGPR triples
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri May 26 00:20:30 PDT 2023
Author: Jay Foad
Date: 2023-05-26T08:06:25+01:00
New Revision: e4284a7c70cd6af818922cbe9722940fa2134ec0
URL: https://github.com/llvm/llvm-project/commit/e4284a7c70cd6af818922cbe9722940fa2134ec0
DIFF: https://github.com/llvm/llvm-project/commit/e4284a7c70cd6af818922cbe9722940fa2134ec0.diff
LOG: [AMDGPU] 4-align SGPR triples
Previously SGPR triples like s[3:5] were aligned on a 3-SGPR boundary
which has no basis in hardware.
Aligning them on a 4-SGPR boundary is at least justified by the
architecture reference guide which says: "Quad-alignment of SGPRs is
required for operation on more than 64-bits".
Currently there are no instructions that take SGPR triples as operands
so the issue is latent.
Differential Revision: https://reviews.llvm.org/D151463
Added:
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index adc44b5f1d1cd..b2b1b458a63af 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -413,7 +413,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
-def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 3, 3, "s">;
+def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 4, 3, "s">;
// SGPR 128-bit registers
def SGPR_128Regs : SIRegisterTuples<getSubRegs<4>.ret, SGPR_32, 105, 4, 4, "s">;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
index 28836016640cc..f1a91058044a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
@@ -189,14 +189,11 @@ body: |
; CHECK-LABEL: name: extract_sgpr_s96_from_s128
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr(s96) = G_EXTRACT %0, 0
- %2:sgpr(s96) = G_EXTRACT %0, 32
- S_ENDPGM 0, implicit %1, implicit %2
+ S_ENDPGM 0, implicit %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
index b563d4e1a9207..b6331fa01bce9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
@@ -240,7 +240,7 @@ body: |
; CHECK-LABEL: name: insert_s_s96_s_s64_0
; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
@@ -252,28 +252,6 @@ body: |
---
-name: insert_s_s96_s_s64_32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
- ; CHECK-LABEL: name: insert_s_s96_s_s64_32
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
- %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
- %1:sgpr(s64) = COPY $sgpr4_sgpr5
- %2:sgpr(s96) = G_INSERT %0, %1, 32
- S_ENDPGM 0, implicit %2
-...
-
----
-
name: insert_s_s128_s_s64_0
legalized: true
regBankSelected: true
@@ -398,110 +376,44 @@ regBankSelected: true
body: |
bb.0:
- liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6
; CHECK-LABEL: name: insert_s_s128_s_s96_0
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr4_sgpr5_sgpr6
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
%0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+ %1:sgpr(s96) = COPY $sgpr4_sgpr5_sgpr6
%2:sgpr(s128) = G_INSERT %0, %1, 0
S_ENDPGM 0, implicit %2
...
---
-name: insert_s_s128_s_s96_32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
- ; CHECK-LABEL: name: insert_s_s128_s_s96_32
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
- ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
- %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
- %2:sgpr(s128) = G_INSERT %0, %1, 32
- S_ENDPGM 0, implicit %2
-...
-
----
-
name: insert_s_s160_s_s96_0
legalized: true
regBankSelected: true
body: |
bb.0:
- liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr8_sgpr9_sgpr10
; CHECK-LABEL: name: insert_s_s160_s_s96_0
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr8_sgpr9_sgpr10
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr8_sgpr9_sgpr10
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
%0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
- %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+ %1:sgpr(s96) = COPY $sgpr8_sgpr9_sgpr10
%2:sgpr(s160) = G_INSERT %0, %1, 0
S_ENDPGM 0, implicit %2
...
---
-name: insert_s_s160_s_s96_32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
- ; CHECK-LABEL: name: insert_s_s160_s_s96_32
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
- ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
- %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
- %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
- %2:sgpr(s160) = G_INSERT %0, %1, 32
- S_ENDPGM 0, implicit %2
-...
-
----
-
-name: insert_s_s160_s_s96_64
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
- ; CHECK-LABEL: name: insert_s_s160_s_s96_64
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
- ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
- %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
- %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
- %2:sgpr(s160) = G_INSERT %0, %1, 64
- S_ENDPGM 0, implicit %2
-...
-
----
-
name: insert_s_s256_s_s128_0
legalized: true
regBankSelected: true
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir
index d0258bdb0cd44..7e6da3b730bdf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir
@@ -125,7 +125,7 @@ body: |
; GCN-LABEL: name: trunc_sgpr_s96_to_s64
; GCN: liveins: $sgpr0_sgpr1_sgpr2
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
@@ -163,7 +163,7 @@ body: |
; GCN-LABEL: name: trunc_sgpr_s128_to_s96
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
index 46ff290848f28..bec5f646b7839 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
@@ -282,34 +282,33 @@ tracksRegLiveness: true
body: |
bb.0:
- liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11
+ liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5_sgpr6, $sgpr8_sgpr9_sgpr10, $sgpr12_sgpr13_sgpr14
; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32
- ; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11
+ ; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5_sgpr6, $sgpr8_sgpr9_sgpr10, $sgpr12_sgpr13_sgpr14
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]]
- ; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]]
- ; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]]
- ; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr12_sgpr13_sgpr14
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>)
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>)
+ ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>)
+ ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>)
%0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
- %1:sgpr(<3 x s32>) = COPY $sgpr3_sgpr4_sgpr5
- %2:sgpr(<3 x s32>) = COPY $sgpr6_sgpr7_sgpr8
- %3:sgpr(<3 x s32>) = COPY $sgpr9_sgpr10_sgpr11
+ %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6
+ %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10
+ %3:sgpr(<3 x s32>) = COPY $sgpr12_sgpr13_sgpr14
%4:sgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3
%5:sgpr(<3 x s32>), %6:sgpr(<3 x s32>), %7:sgpr(<3 x s32>), %8:sgpr(<3 x s32>) = G_UNMERGE_VALUES %4
$sgpr0_sgpr1_sgpr2 = COPY %5
- $sgpr3_sgpr4_sgpr5 = COPY %6
- $sgpr6_sgpr7_sgpr8 = COPY %7
- $sgpr9_sgpr10_sgpr11 = COPY %8
+ $sgpr4_sgpr5_sgpr6 = COPY %6
+ $sgpr8_sgpr9_sgpr10 = COPY %7
+ $sgpr12_sgpr13_sgpr14 = COPY %8
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index 996ec6e8d64c5..fc5f953e02cf0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -669,10 +669,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 2, v0
-; GFX11-NEXT: s_mov_b32 s9, 0x40400000
+; GFX11-NEXT: s_mov_b32 s8, 0x40400000
; GFX11-NEXT: s_mov_b32 s12, 0x40c00000
-; GFX11-NEXT: s_mov_b32 s11, 0x40a00000
-; GFX11-NEXT: s_mov_b32 s10, 4.0
+; GFX11-NEXT: s_mov_b32 s10, 0x40a00000
+; GFX11-NEXT: s_mov_b32 s9, 4.0
; GFX11-NEXT: s_mov_b32 s14, 0x41000000
; GFX11-NEXT: s_mov_b32 s13, 0x40e00000
; GFX11-NEXT: v_mov_b32_e32 v6, s12
@@ -691,9 +691,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
; GFX11-NEXT: flat_load_b32 v9, v[0:1]
; GFX11-NEXT: flat_load_b32 v10, v[2:3]
; GFX11-NEXT: s_mov_b32 s2, 2.0
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s9
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s8
; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
-; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v4, s10
+; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v9, v10, v[0:2], v[3:5], v[6:8]], s[4:7]
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -778,9 +778,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 2, v0
-; GFX11-NEXT: s_mov_b32 s9, 0x42004600
-; GFX11-NEXT: s_mov_b32 s10, 0x44004700
-; GFX11-NEXT: s_mov_b32 s11, 0x45004800
+; GFX11-NEXT: s_mov_b32 s8, 0x42004600
+; GFX11-NEXT: s_mov_b32 s9, 0x44004700
+; GFX11-NEXT: s_mov_b32 s10, 0x45004800
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: s_mov_b32 s1, 1.0
@@ -795,9 +795,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_
; GFX11-NEXT: flat_load_b32 v6, v[0:1]
; GFX11-NEXT: flat_load_b32 v7, v[2:3]
; GFX11-NEXT: s_mov_b32 s2, 2.0
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s9
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s8
; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
-; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v4, s10
+; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v6, v7, v[0:2], v[3:5]], s[4:7] a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -887,31 +887,31 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x34
+; GFX11-NEXT: s_mov_b32 s16, 0xb36211c7
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX11-NEXT: s_mov_b32 s8, 2.0
-; GFX11-NEXT: s_mov_b32 s7, 1.0
-; GFX11-NEXT: s_mov_b32 s6, 0
-; GFX11-NEXT: s_mov_b32 s9, 0x40400000
+; GFX11-NEXT: s_movk_i32 s17, 0x102
+; GFX11-NEXT: s_mov_b32 s8, 0x40400000
; GFX11-NEXT: s_mov_b32 s12, 0x40c00000
-; GFX11-NEXT: s_mov_b32 s11, 0x40a00000
-; GFX11-NEXT: s_mov_b32 s10, 4.0
+; GFX11-NEXT: s_mov_b32 s6, 2.0
+; GFX11-NEXT: s_mov_b32 s10, 0x40a00000
+; GFX11-NEXT: s_mov_b32 s9, 4.0
; GFX11-NEXT: s_mov_b32 s14, 0x41000000
; GFX11-NEXT: s_mov_b32 s13, 0x40e00000
; GFX11-NEXT: v_mov_b32_e32 v6, s12
-; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v3, s9
-; GFX11-NEXT: v_dual_mov_b32 v4, s10 :: v_dual_mov_b32 v7, s13
+; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v9, s16
+; GFX11-NEXT: v_dual_mov_b32 v3, s8 :: v_dual_mov_b32 v4, s9
+; GFX11-NEXT: v_mov_b32_e32 v7, s13
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v0, s4
; GFX11-NEXT: v_mov_b32_e32 v1, s5
-; GFX11-NEXT: s_mov_b32 s4, 0xb36211c7
-; GFX11-NEXT: s_movk_i32 s5, 0x102
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v9, s4
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_mov_b32 s5, 1.0
+; GFX11-NEXT: v_mov_b32_e32 v10, s17
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-NEXT: flat_load_b32 v11, v[0:1]
-; GFX11-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
-; GFX11-NEXT: v_mov_b32_e32 v2, s8
+; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX11-NEXT: v_mov_b32_e32 v2, s6
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[9:10], v11, v[0:2], v[3:5], v[6:8]], s[0:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -993,27 +993,25 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x34
+; GFX11-NEXT: s_mov_b32 s12, 0xb36211c6
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX11-NEXT: s_mov_b32 s6, 0
-; GFX11-NEXT: s_mov_b32 s9, 0x42004600
-; GFX11-NEXT: s_mov_b32 s8, 2.0
-; GFX11-NEXT: s_mov_b32 s7, 1.0
-; GFX11-NEXT: s_mov_b32 s10, 0x44004700
-; GFX11-NEXT: s_mov_b32 s11, 0x45004800
-; GFX11-NEXT: v_dual_mov_b32 v3, s9 :: v_dual_mov_b32 v4, s10
+; GFX11-NEXT: s_movk_i32 s13, 0x102
+; GFX11-NEXT: s_mov_b32 s6, 2.0
+; GFX11-NEXT: s_mov_b32 s8, 0x42004600
+; GFX11-NEXT: s_mov_b32 s9, 0x44004700
+; GFX11-NEXT: s_mov_b32 s10, 0x45004800
+; GFX11-NEXT: v_dual_mov_b32 v3, s8 :: v_dual_mov_b32 v4, s9
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v0, s4
; GFX11-NEXT: v_mov_b32_e32 v1, s5
-; GFX11-NEXT: s_mov_b32 s4, 0xb36211c6
-; GFX11-NEXT: s_movk_i32 s5, 0x102
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_mov_b32_e32 v7, s5
+; GFX11-NEXT: s_mov_b32 s5, 1.0
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v7, s13
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX11-NEXT: v_mov_b32_e32 v6, s4
; GFX11-NEXT: flat_load_b32 v8, v[0:1]
-; GFX11-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
-; GFX11-NEXT: v_mov_b32_e32 v2, s8
+; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX11-NEXT: v_mov_b32_e32 v2, s6
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[6:7], v8, v[0:2], v[3:5]], s[0:3] a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
index 1857bc936957b..0f9ec965f2f0f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
@@ -10,25 +10,25 @@
define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x) {
; GFX9-LABEL: store_lds_v3i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s12
-; GFX9-NEXT: v_mov_b32_e32 v1, s13
-; GFX9-NEXT: v_mov_b32_e32 v2, s14
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: v_mov_b32_e32 v2, s6
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: ds_write_b96 v3, v[0:2]
; GFX9-NEXT: s_endpgm
;
; GFX7-LABEL: store_lds_v3i32:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4
+; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX7-NEXT: s_mov_b32 m0, -1
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, s12
-; GFX7-NEXT: v_mov_b32_e32 v1, s13
-; GFX7-NEXT: v_mov_b32_e32 v2, s14
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: v_mov_b32_e32 v2, s6
; GFX7-NEXT: v_mov_b32_e32 v3, s0
; GFX7-NEXT: ds_write_b96 v3, v[0:2]
; GFX7-NEXT: s_endpgm
@@ -36,12 +36,12 @@ define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x)
; GFX10-LABEL: store_lds_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s12
-; GFX10-NEXT: v_mov_b32_e32 v1, s13
-; GFX10-NEXT: v_mov_b32_e32 v2, s14
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s2
; GFX10-NEXT: ds_write_b96 v3, v[0:2]
; GFX10-NEXT: s_endpgm
@@ -49,11 +49,11 @@ define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x)
; GFX11-LABEL: store_lds_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[12:15], s[0:1], 0x10
+; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x10
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
-; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0
+; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s0
; GFX11-NEXT: ds_store_b96 v3, v[0:2]
; GFX11-NEXT: s_endpgm
store <3 x i32> %x, ptr addrspace(3) %out
@@ -447,25 +447,25 @@ define amdgpu_kernel void @store_lds_v3i32_align8(ptr addrspace(3) %out, <3 x i3
define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i32> %x) {
; GFX9-LABEL: store_lds_v3i32_align16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s12
-; GFX9-NEXT: v_mov_b32_e32 v1, s13
-; GFX9-NEXT: v_mov_b32_e32 v2, s14
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: v_mov_b32_e32 v2, s6
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: ds_write_b96 v3, v[0:2]
; GFX9-NEXT: s_endpgm
;
; GFX7-LABEL: store_lds_v3i32_align16:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4
+; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX7-NEXT: s_mov_b32 m0, -1
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, s12
-; GFX7-NEXT: v_mov_b32_e32 v1, s13
-; GFX7-NEXT: v_mov_b32_e32 v2, s14
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: v_mov_b32_e32 v2, s6
; GFX7-NEXT: v_mov_b32_e32 v3, s0
; GFX7-NEXT: ds_write_b96 v3, v[0:2]
; GFX7-NEXT: s_endpgm
@@ -473,12 +473,12 @@ define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i
; GFX10-LABEL: store_lds_v3i32_align16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s12
-; GFX10-NEXT: v_mov_b32_e32 v1, s13
-; GFX10-NEXT: v_mov_b32_e32 v2, s14
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s2
; GFX10-NEXT: ds_write_b96 v3, v[0:2]
; GFX10-NEXT: s_endpgm
@@ -486,11 +486,11 @@ define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i
; GFX11-LABEL: store_lds_v3i32_align16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[12:15], s[0:1], 0x10
+; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x10
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
-; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0
+; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s0
; GFX11-NEXT: ds_store_b96 v3, v[0:2]
; GFX11-NEXT: s_endpgm
store <3 x i32> %x, ptr addrspace(3) %out, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index c64ed7f3d48b9..6a9ad2a0f6dad 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -45,7 +45,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr24, $sgpr33, $vgpr31, $agpr0, $vgpr26, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr58, $sgpr59, $sgpr21_sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3, $vgpr20, $vgpr22
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr24, $sgpr33, $vgpr31, $agpr0, $vgpr26, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr58, $sgpr59, $sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3, $vgpr20, $vgpr22
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr20 = IMPLICIT_DEF
@@ -578,7 +578,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.44:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr58, $vgpr57, $vgpr20, $vgpr61, $vgpr31, $vgpr63, $agpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr40, $vgpr62, $vgpr60, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21, $sgpr21_sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr3, $vgpr4, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr14
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr58, $vgpr57, $vgpr20, $vgpr61, $vgpr31, $vgpr63, $agpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr40, $vgpr62, $vgpr60, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr3, $vgpr4, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr14
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
index b97a9237a0b97..46a72c032827c 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
+++ b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
@@ -32,17 +32,16 @@ name: nonoverlapping_copy_kill
tracksRegLiveness: true
body: |
bb.0:
- liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
+ liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6
; CHECK-LABEL: name: nonoverlapping_copy_kill
- ; CHECK: liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
+ ; CHECK: liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
- ; CHECK-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
- ; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit killed $sgpr3_sgpr4_sgpr5
+ ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6, implicit-def $sgpr0_sgpr1_sgpr2
+ ; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr6, implicit killed $sgpr4_sgpr5_sgpr6
; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
- renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr3_sgpr4_sgpr5
+ renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr4_sgpr5_sgpr6
renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
index 6e95091a6b98c..03f7d4fd8fd03 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
@@ -8,15 +8,15 @@
define amdgpu_kernel void @s_input_output_i128() {
; GFX908-LABEL: name: s_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7143434 /* regdef:SGPR_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7143433 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: s_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7143434 /* regdef:SGPR_128 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7143433 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=s"()
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
define amdgpu_kernel void @v_input_output_i128() {
; GFX908-LABEL: name: v_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:VReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: v_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:VReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=v"()
call void asm sideeffect "; use $0", "v"(i128 %val)
@@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
define amdgpu_kernel void @a_input_output_i128() {
; GFX908-LABEL: name: a_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:AReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5701641 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: a_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:AReg_128_Align2 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5963786 /* regdef:AReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5963785 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = call i128 asm sideeffect "; def $0", "=a"()
call void asm sideeffect "; use $0", "a"(i128 %val)
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 8ae07a74503a2..258c1ecfcbc9f 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX908-NEXT: {{ $}}
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %26
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:VReg_128 */, def %26
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def %23
; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -35,7 +35,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
@@ -58,7 +58,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX90A-NEXT: {{ $}}
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %25
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128_Align2 */, def %25
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def %23
; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -80,7 +80,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
index 8398864c67030..2d856ac4b171f 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
@@ -57,63 +57,22 @@ body: |
; GFX9-LABEL: name: sgpr96_aligned_src_dst
; GFX9: liveins: $sgpr0_sgpr1_sgpr2
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: $sgpr8 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr6_sgpr7_sgpr8
- ; GFX9-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2
- $sgpr6_sgpr7_sgpr8 = COPY $sgpr0_sgpr1_sgpr2
-...
-
----
-name: sgpr96_aligned_src
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2
- ; GFX9-LABEL: name: sgpr96_aligned_src
- ; GFX9: liveins: $sgpr0_sgpr1_sgpr2
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: $sgpr5 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr3_sgpr4_sgpr5
- ; GFX9-NEXT: $sgpr4 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2
- ; GFX9-NEXT: $sgpr3 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2
- $sgpr3_sgpr4_sgpr5 = COPY $sgpr0_sgpr1_sgpr2
-...
-
----
-name: sgpr96_aligned_dst
-body: |
- bb.0:
- liveins: $sgpr3_sgpr4_sgpr5
- ; GFX9-LABEL: name: sgpr96_aligned_dst
- ; GFX9: liveins: $sgpr3_sgpr4_sgpr5
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
- ; GFX9-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
- ; GFX9-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit $sgpr3_sgpr4_sgpr5
- $sgpr0_sgpr1_sgpr2 = COPY $sgpr3_sgpr4_sgpr5
-...
-
----
-name: sgpr96_unaligned_src_dst
-body: |
- bb.0:
- liveins: $sgpr3_sgpr4_sgpr5
- ; GFX9-LABEL: name: sgpr96_unaligned_src_dst
- ; GFX9: liveins: $sgpr3_sgpr4_sgpr5
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11
- ; GFX9-NEXT: $sgpr9 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5
- $sgpr9_sgpr10_sgpr11 = COPY $sgpr3_sgpr4_sgpr5
+ ; GFX9-NEXT: $sgpr6 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr4_sgpr5_sgpr6
+ ; GFX9-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2
+ $sgpr4_sgpr5_sgpr6 = COPY $sgpr0_sgpr1_sgpr2
...
---
name: sgpr96_killed
body: |
bb.0:
- liveins: $sgpr3_sgpr4_sgpr5
+ liveins: $sgpr4_sgpr5_sgpr6
; GFX9-LABEL: name: sgpr96_killed
- ; GFX9: liveins: $sgpr3_sgpr4_sgpr5
+ ; GFX9: liveins: $sgpr4_sgpr5_sgpr6
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11
- ; GFX9-NEXT: $sgpr9 = S_MOV_B32 $sgpr3, implicit killed $sgpr3_sgpr4_sgpr5
- $sgpr9_sgpr10_sgpr11 = COPY killed $sgpr3_sgpr4_sgpr5
+ ; GFX9-NEXT: $sgpr10 = S_MOV_B32 $sgpr6, implicit $sgpr4_sgpr5_sgpr6, implicit-def $sgpr8_sgpr9_sgpr10
+ ; GFX9-NEXT: $sgpr8_sgpr9 = S_MOV_B64 $sgpr4_sgpr5, implicit killed $sgpr4_sgpr5_sgpr6
+ $sgpr8_sgpr9_sgpr10 = COPY killed $sgpr4_sgpr5_sgpr6
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
index ac485d388a5e7..2f43cc022afd3 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@@ -218,7 +218,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: KILL %411.sub0, %411.sub1
; CHECK-NEXT: KILL undef %488:sreg_64
- ; CHECK-NEXT: KILL [[COPY15]].sub0_sub1, [[COPY15]].sub2_sub3
+ ; CHECK-NEXT: KILL [[COPY15]].sub0_sub1_sub2, [[COPY15]].sub3
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4)
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
More information about the llvm-commits
mailing list