[llvm] bcf5184 - AMDGPU/GlobalISel: Make sure <2 x s1> phis are scalarized
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 26 07:16:05 PDT 2020
Author: Matt Arsenault
Date: 2020-07-26T10:04:47-04:00
New Revision: bcf5184a68d1d851895692bae6eed16a74b519db
URL: https://github.com/llvm/llvm-project/commit/bcf5184a68d1d851895692bae6eed16a74b519db
DIFF: https://github.com/llvm/llvm-project/commit/bcf5184a68d1d851895692bae6eed16a74b519db.diff
LOG: AMDGPU/GlobalISel: Make sure <2 x s1> phis are scalarized
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index f72db8a61aab..c21414d59ba0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -415,11 +415,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor(AllS64Vectors)
.legalFor(AddrSpaces64)
.legalFor(AddrSpaces32)
+ .legalIf(isPointer(0))
.clampScalar(0, S32, S256)
.widenScalarToNextPow2(0, 32)
.clampMaxNumElements(0, S32, 16)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .legalIf(isPointer(0));
+ .scalarize(0);
if (ST.hasVOP3PInsts()) {
assert(ST.hasIntClamp() && "all targets with VOP3P should support clamp");
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
index 81408b79b11f..9a91d908bb7b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
@@ -1547,3 +1547,94 @@ body: |
S_SETPC_B64 undef $sgpr30_sgpr31
...
+---
+name: test_phi_v2s1
+tracksRegLiveness: true
+
+body: |
+ ; CHECK-LABEL: name: test_phi_v2s1
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]]
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+ ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
+ ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]]
+ ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]]
+ ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32)
+ ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32)
+ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+ ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]]
+ ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]]
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+ ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]]
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+ ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]]
+ ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]]
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1
+ ; CHECK: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1)
+ ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1)
+ ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+ ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]]
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+ ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]]
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr1
+ %4:_(s32) = G_CONSTANT i32 0
+ %5:_(<2 x s1>) = G_ICMP intpred(eq), %0, %1
+ %6:_(s1) = G_ICMP intpred(eq), %3, %4
+ G_BRCOND %6, %bb.1
+ G_BR %bb.2
+
+ bb.1:
+ successors: %bb.2
+
+ %7:_(<2 x s1>) = G_ICMP intpred(ne), %0, %2
+ G_BR %bb.2
+
+ bb.2:
+ %8:_(<2 x s1>) = G_PHI %5, %bb.0, %7, %bb.1
+ %9:_(<2 x s32>) = G_ZEXT %8
+ $vgpr0_vgpr1 = COPY %9
+ S_SETPC_B64 undef $sgpr30_sgpr31
+...
More information about the llvm-commits
mailing list