[llvm] cd2594e - [GlobalISel] Improve legalization of narrow CTTZ
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 6 01:42:37 PDT 2021
Author: Jay Foad
Date: 2021-08-06T09:40:48+01:00
New Revision: cd2594e1c67836bfeb8dd416ac32c77abba3f290
URL: https://github.com/llvm/llvm-project/commit/cd2594e1c67836bfeb8dd416ac32c77abba3f290
DIFF: https://github.com/llvm/llvm-project/commit/cd2594e1c67836bfeb8dd416ac32c77abba3f290.diff
LOG: [GlobalISel] Improve legalization of narrow CTTZ
Differential Revision: https://reviews.llvm.org/D107457
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 51d7bc6b61f69..f90b7df72d23b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2054,7 +2054,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// First ZEXT the input.
auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
LLT CurTy = MRI.getType(SrcReg);
- if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+ unsigned NewOpc = MI.getOpcode();
+ if (NewOpc == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
@@ -2062,10 +2063,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
MIBSrc = MIRBuilder.buildOr(
WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+ // Now we know the operand is non-zero, use the more relaxed opcode.
+ NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
// Perform the operation at the larger size.
- auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+ auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
index 88d05f977e318..183e0811e9e6e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
@@ -96,11 +96,7 @@ body: |
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
- ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]]
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; CHECK: $vgpr0 = COPY [[AND1]](s32)
@@ -176,18 +172,12 @@ body: |
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]]
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C3]]
- ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[CTTZ_ZERO_UNDEF]]
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]]
; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32)
- ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR1]](s32), [[C3]]
- ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C4]], [[CTTZ_ZERO_UNDEF1]]
- ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32)
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32)
@@ -216,11 +206,7 @@ body: |
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
- ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]]
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; CHECK: $vgpr0 = COPY [[AND1]](s32)
@@ -246,14 +232,10 @@ body: |
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592
; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]]
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s64)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s64), [[C2]]
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32)
- ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64)
- ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C4]](s64)
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64)
; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index b5f6e64b24512..ded20d37f62f8 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -714,7 +714,7 @@ TEST_F(AArch64GISelMITest, WidenBitCountingCTTZ) {
CHECK: [[Zext:%[0-9]+]]:_(s16) = G_ZEXT [[Trunc]]
CHECK: [[Cst:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
CHECK: [[Or:%[0-9]+]]:_(s16) = G_OR [[Zext]]:_, [[Cst]]
- CHECK: [[Cttz:%[0-9]+]]:_(s16) = G_CTTZ [[Or]]
+ CHECK: [[Cttz:%[0-9]+]]:_(s16) = G_CTTZ_ZERO_UNDEF [[Or]]
CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC [[Cttz]]
)";
More information about the llvm-commits
mailing list