[llvm] cd2594e - [GlobalISel] Improve legalization of narrow CTTZ

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 6 01:42:37 PDT 2021


Author: Jay Foad
Date: 2021-08-06T09:40:48+01:00
New Revision: cd2594e1c67836bfeb8dd416ac32c77abba3f290

URL: https://github.com/llvm/llvm-project/commit/cd2594e1c67836bfeb8dd416ac32c77abba3f290
DIFF: https://github.com/llvm/llvm-project/commit/cd2594e1c67836bfeb8dd416ac32c77abba3f290.diff

LOG: [GlobalISel] Improve legalization of narrow CTTZ

Differential Revision: https://reviews.llvm.org/D107457

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
    llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 51d7bc6b61f69..f90b7df72d23b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2054,7 +2054,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     // First ZEXT the input.
     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
     LLT CurTy = MRI.getType(SrcReg);
-    if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+    unsigned NewOpc = MI.getOpcode();
+    if (NewOpc == TargetOpcode::G_CTTZ) {
       // The count is the same in the larger type except if the original
       // value was zero.  This can be handled by setting the bit just off
       // the top of the original type.
@@ -2062,10 +2063,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
       MIBSrc = MIRBuilder.buildOr(
         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+      // Now we know the operand is non-zero, use the more relaxed opcode.
+      NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
     }
 
     // Perform the operation at the larger size.
-    auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+    auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
     // This is already the correct result for CTPOP and CTTZs
     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
index 88d05f977e318..183e0811e9e6e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
@@ -96,11 +96,7 @@ body: |
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
     ; CHECK: $vgpr0 = COPY [[AND1]](s32)
@@ -176,18 +172,12 @@ body: |
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C3]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
     ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]]
     ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32)
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR1]](s32), [[C3]]
-    ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C4]], [[CTTZ_ZERO_UNDEF1]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32)
     ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
     ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
     ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32)
@@ -216,11 +206,7 @@ body: |
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
     ; CHECK: $vgpr0 = COPY [[AND1]](s32)
@@ -246,14 +232,10 @@ body: |
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592
     ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s64)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s64), [[C2]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32)
-    ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
     ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C4]](s64)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64)
     ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
     ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64)
     ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64)

diff  --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index b5f6e64b24512..ded20d37f62f8 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -714,7 +714,7 @@ TEST_F(AArch64GISelMITest, WidenBitCountingCTTZ) {
   CHECK: [[Zext:%[0-9]+]]:_(s16) = G_ZEXT [[Trunc]]
   CHECK: [[Cst:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
   CHECK: [[Or:%[0-9]+]]:_(s16) = G_OR [[Zext]]:_, [[Cst]]
-  CHECK: [[Cttz:%[0-9]+]]:_(s16) = G_CTTZ [[Or]]
+  CHECK: [[Cttz:%[0-9]+]]:_(s16) = G_CTTZ_ZERO_UNDEF [[Or]]
   CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC [[Cttz]]
   )";
 


        


More information about the llvm-commits mailing list