[llvm] cc65e08 - [GlobalISel][Legalizer] Use ArtifactValueFinder first for unmerge combines before trying others.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 21 00:02:21 PDT 2021
Author: Amara Emerson
Date: 2021-09-21T00:02:15-07:00
New Revision: cc65e08fe7e1402cd6549882879f50e5238e8178
URL: https://github.com/llvm/llvm-project/commit/cc65e08fe7e1402cd6549882879f50e5238e8178
DIFF: https://github.com/llvm/llvm-project/commit/cc65e08fe7e1402cd6549882879f50e5238e8178.diff
LOG: [GlobalISel][Legalizer] Use ArtifactValueFinder first for unmerge combines before trying others.
This is motivated by an pathological compile time issue during unmerge combining.
We should be able to use the AVF to do simplification. However AMDGPU
has a lot of codegen changes which I'm not sure how to evaluate.
Differential Revision: https://reviews.llvm.org/D109748
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 80d3467915c9a..8a603de2f91db 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -821,6 +821,12 @@ class LegalizationArtifactCombiner {
Builder.setInstrAndDebugLoc(MI);
+ ArtifactValueFinder Finder(MRI, Builder, LI);
+ if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
+ markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
+ return true;
+ }
+
if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
// %0:_(<4 x s16>) = G_FOO
// %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
@@ -844,15 +850,9 @@ class LegalizationArtifactCombiner {
if (ActionStep.TypeIdx == 1)
return false;
break;
- default: {
- ArtifactValueFinder Finder(MRI, Builder, LI);
- if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
- markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
- return true;
- }
+ default:
return false;
}
- }
auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
@@ -883,16 +883,7 @@ class LegalizationArtifactCombiner {
ConvertOp, OpTy, DestTy)) {
// We might have a chance to combine later by trying to combine
// unmerge(cast) first
- if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs))
- return true;
-
- // Try using the value finder.
- ArtifactValueFinder Finder(MRI, Builder, LI);
- if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
- markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
- return true;
- }
- return false;
+ return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
}
const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
index f447b8e9807c3..68c9c6cbc69d6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
@@ -241,8 +241,8 @@ body: |
liveins: $x0
; CHECK-LABEL: name: test_eve_v4p0
; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
- ; CHECK: %idx:_(s64) = G_CONSTANT i64 1
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[DEF]](p0), [[DEF]](p0)
+ ; CHECK: %idx:_(s64) = G_CONSTANT i64 1
; CHECK: %eve:_(p0) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x p0>), %idx(s64)
; CHECK: $x0 = COPY %eve(p0)
; CHECK: RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
index 08a6c787648f3..2112497fa607c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -314,9 +314,9 @@ body: |
; CHECK-LABEL: name: store_32xs8
; CHECK: liveins: $x0
; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store (<16 x s8>), align 32)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
@@ -338,9 +338,9 @@ body: |
; CHECK-LABEL: name: store_16xs16
; CHECK: liveins: $x0
; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store (<8 x s16>), align 32)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
@@ -362,9 +362,9 @@ body: |
; CHECK-LABEL: name: store_8xs32
; CHECK: liveins: $x0
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store (<4 x s32>), align 32)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index b6a78753202cd..9b5bfc13d50b6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -254,9 +254,9 @@ body: |
; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64)
; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64)
; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64)
- ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64)
; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64)
+ ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
index 467737e029486..d4ec463bd34f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
@@ -846,19 +846,16 @@ body: |
; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>)
; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]]
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>)
- ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
+ ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
- ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR7]](s32)
- ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32)
- ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR8]](s32), [[BITCAST9]](s32)
- ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
- ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
- ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
+ ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+ ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32)
+ ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>)
+ ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
+ ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
index 5d31438ea1eb2..b2a3886c7dd86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
@@ -837,31 +837,24 @@ body: |
; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]]
; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>)
- ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32)
- ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
+ ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32)
- ; SI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32)
+ ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; SI: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]]
- ; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
+ ; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32)
; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]]
- ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
- ; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
+ ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+ ; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
; SI: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32)
; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]]
- ; SI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
- ; SI: [[AND38:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C1]]
- ; SI: [[AND39:%[0-9]+]]:_(s32) = G_AND [[BITCAST17]], [[C1]]
- ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[C]](s32)
- ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND38]], [[SHL20]]
- ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
- ; SI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>)
- ; SI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>)
- ; SI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>)
+ ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
+ ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
+ ; SI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>)
+ ; SI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>)
; VI-LABEL: name: test_fshr_v3s16_v3s16
; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
@@ -990,31 +983,24 @@ body: |
; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]]
; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>)
- ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32)
- ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
+ ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32)
- ; VI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32)
+ ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]]
- ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
+ ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32)
; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]]
- ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
- ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
+ ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+ ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32)
; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]]
- ; VI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
- ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C1]]
- ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[BITCAST17]], [[C1]]
- ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C]](s32)
- ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL20]]
- ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
- ; VI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>)
- ; VI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>)
- ; VI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>)
+ ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
+ ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
+ ; VI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>)
+ ; VI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>)
; GFX9-LABEL: name: test_fshr_v3s16_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
@@ -1074,19 +1060,16 @@ body: |
; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>)
; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]]
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>)
- ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
+ ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
- ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR5]](s32)
- ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32)
- ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST9]](s32)
- ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
- ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
- ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
+ ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+ ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32)
+ ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>)
+ ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
+ ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index 77110837cadee..800be96c92516 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -711,7 +711,6 @@ body: |
; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5)
; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5)
; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5)
- ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32)
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
@@ -728,6 +727,7 @@ body: |
; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32)
; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32)
; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32)
+ ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
index 045cc8da81a30..c0bfde9e89a5b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
@@ -148,25 +148,18 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32
; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %tex
@@ -397,25 +390,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
- ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
+ ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
; PACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; PACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
- ; PACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+ ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
@@ -634,25 +620,18 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc,
; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
- ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
+ ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
+ ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %tex
@@ -710,25 +689,18 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc,
; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
- ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
+ ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
+ ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %tex
@@ -1212,25 +1184,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
- ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
+ ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
- ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
+ ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
@@ -1298,25 +1263,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
- ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
+ ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
- ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
+ ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
@@ -1384,25 +1342,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
- ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
+ ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
- ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
+ ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
index 99ab3580b91df..47c4ac2aa0428 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
@@ -535,10 +535,7 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16
; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0
; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
-; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3
-; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-PACKED-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: load_1d_v3f16_xyz:
@@ -555,10 +552,7 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0
-; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: load_1d_v3f16_xyz:
@@ -571,15 +565,11 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: s_mov_b32 s7, s9
-; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff
; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_lshl_b32 s0, s0, 16
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0
-; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2
+; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0
; GFX10-NEXT: ; return to shader part epilog
%v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %v
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index f65d72a560dd3..a03df7c7cb7b7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -140,9 +140,9 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, 0xffff
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT: s_mov_b32 s5, s4
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_and_b32 s2, s0, s4
-; GFX8-NEXT: s_mov_b32 s5, s4
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_and_b32 s6, s1, s4
; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5]
More information about the llvm-commits
mailing list