[PATCH] D107457: [GlobalISel] Improve legalization of narrow CTTZ

Jay Foad via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 4 06:43:09 PDT 2021


foad created this revision.
foad added reviewers: aditya_nandakumar, arsenm.
Herald added subscribers: kerbowa, hiraditya, rovka, nhaehnle, jvesely.
foad requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107457

Files:
  llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
  llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir


Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
@@ -96,11 +96,7 @@
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
     ; CHECK: $vgpr0 = COPY [[AND1]](s32)
@@ -176,18 +172,12 @@
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C3]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
     ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]]
     ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32)
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR1]](s32), [[C3]]
-    ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C4]], [[CTTZ_ZERO_UNDEF1]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32)
     ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
     ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
     ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32)
@@ -216,11 +206,7 @@
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
     ; CHECK: $vgpr0 = COPY [[AND1]](s32)
@@ -246,14 +232,10 @@
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592
     ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]]
     ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s64)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s64), [[C2]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
-    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32)
-    ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
     ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C4]](s64)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64)
     ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
     ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64)
     ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2054,7 +2054,8 @@
     // First ZEXT the input.
     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
     LLT CurTy = MRI.getType(SrcReg);
-    if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+    unsigned NewOpc = MI.getOpcode();
+    if (NewOpc == TargetOpcode::G_CTTZ) {
       // The count is the same in the larger type except if the original
       // value was zero.  This can be handled by setting the bit just off
       // the top of the original type.
@@ -2062,10 +2063,12 @@
           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
       MIBSrc = MIRBuilder.buildOr(
         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+      // Now we know the operand is non-zero, use the more relaxed opcode.
+      NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
     }
 
     // Perform the operation at the larger size.
-    auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+    auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
     // This is already the correct result for CTPOP and CTTZs
     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D107457.364086.patch
Type: text/x-patch
Size: 5787 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210804/047097e0/attachment.bin>


More information about the llvm-commits mailing list