[llvm] 312a9d1 - GlobalISel: Fix narrowScalar for G_{CTLZ|CTTZ}_ZERO_UNDEF

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 9 16:21:59 PST 2020


Author: Matt Arsenault
Date: 2020-02-09T19:02:38-05:00
New Revision: 312a9d1b8343f5185ae9c6cdd2b022f1f93514e5

URL: https://github.com/llvm/llvm-project/commit/312a9d1b8343f5185ae9c6cdd2b022f1f93514e5
DIFF: https://github.com/llvm/llvm-project/commit/312a9d1b8343f5185ae9c6cdd2b022f1f93514e5.diff

LOG: GlobalISel: Fix narrowScalar for G_{CTLZ|CTTZ}_ZERO_UNDEF

Narrow these for 64-bit VALU for AMDGPU.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6100def1dedf..5396fcfc4824 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1028,8 +1028,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     if (TypeIdx == 1)
       switch (MI.getOpcode()) {
       case TargetOpcode::G_CTLZ:
+      case TargetOpcode::G_CTLZ_ZERO_UNDEF:
         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
       case TargetOpcode::G_CTTZ:
+      case TargetOpcode::G_CTTZ_ZERO_UNDEF:
         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
       case TargetOpcode::G_CTPOP:
         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
@@ -3985,13 +3987,17 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
   unsigned NarrowSize = NarrowTy.getSizeInBits();
 
   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+    const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
+
     MachineIRBuilder &B = MIRBuilder;
     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
     auto C_0 = B.buildConstant(NarrowTy, 0);
     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
                                 UnmergeSrc.getReg(1), C_0);
-    auto LoCTLZ = B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
+    auto LoCTLZ = IsUndef ?
+      B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
+      B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
@@ -4017,13 +4023,17 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
   unsigned NarrowSize = NarrowTy.getSizeInBits();
 
   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+    const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
+
     MachineIRBuilder &B = MIRBuilder;
     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
     auto C_0 = B.buildConstant(NarrowTy, 0);
     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
                                 UnmergeSrc.getReg(0), C_0);
-    auto HiCTTZ = B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
+    auto HiCTTZ = IsUndef ?
+      B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
+      B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index d8e3269d3c6d..c444437fce80 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2084,7 +2084,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
     MI.eraseFromParent();
     return;
   }
-  case AMDGPU::G_CTPOP: {
+  case AMDGPU::G_CTPOP:
+  case AMDGPU::G_CTLZ_ZERO_UNDEF:
+  case AMDGPU::G_CTTZ_ZERO_UNDEF: {
     MachineIRBuilder B(MI);
     MachineFunction &MF = B.getMF();
 
@@ -2104,7 +2106,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
     LegalizerHelper Helper(MF, Observer, B);
 
     if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
-      llvm_unreachable("widenScalar should have succeeded");
+      llvm_unreachable("narrowScalar should have succeeded");
     return;
   }
   case AMDGPU::G_SEXT:
@@ -3204,9 +3206,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
     break;
   }
-  case AMDGPU::G_CTLZ:
   case AMDGPU::G_CTLZ_ZERO_UNDEF:
-  case AMDGPU::G_CTTZ:
   case AMDGPU::G_CTTZ_ZERO_UNDEF:
   case AMDGPU::G_CTPOP: {
     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir
index 19b64b93499b..991da6c35e61 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir
@@ -3,29 +3,72 @@
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
-name: ctlz_zero_undef_i32_s
+name: ctlz_zero_undef_s32_s
 legalized: true
 
 body: |
   bb.0:
     liveins: $sgpr0
-    ; CHECK-LABEL: name: ctlz_zero_undef_i32_s
+    ; CHECK-LABEL: name: ctlz_zero_undef_s32_s
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]]
+    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = G_CTLZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
 ...
 
 ---
-name: ctlz_zero_undef_i32_v
+name: ctlz_zero_undef_s32_v
 legalized: true
 
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1
-    ; CHECK-LABEL: name: ctlz_zero_undef_i32_v
+    ; CHECK-LABEL: name: ctlz_zero_undef_s32_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]]
+    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = G_CTLZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: ctlz_zero_undef_s64_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: ctlz_zero_undef_s64_s
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64)
+    ; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32)
+    %0:_(s64) = COPY $sgpr0_sgpr1
+    %1:_(s32) = G_CTLZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: ctlz_zero_undef_s64_v
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: ctlz_zero_undef_s64_v
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]]
+    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32
+    ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTLZ_ZERO_UNDEF]], [[C1]]
+    ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
+    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[CTLZ_ZERO_UNDEF1]]
+    ; CHECK: S_ENDPGM 0, implicit [[SELECT]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_CTLZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir
index 127c7fb8a84e..a07b595005c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir
@@ -3,29 +3,72 @@
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
-name: cttz_zero_undef_i32_s
+name: cttz_zero_undef_s32_s
 legalized: true
 
 body: |
   bb.0:
     liveins: $sgpr0
-    ; CHECK-LABEL: name: cttz_zero_undef_i32_s
+    ; CHECK-LABEL: name: cttz_zero_undef_s32_s
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]]
+    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = G_CTTZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
 ...
 
 ---
-name: cttz_zero_undef_i32_v
+name: cttz_zero_undef_s32_v
 legalized: true
 
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1
-    ; CHECK-LABEL: name: cttz_zero_undef_i32_v
+    ; CHECK-LABEL: name: cttz_zero_undef_s32_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]]
+    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = G_CTTZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: cttz_zero_undef_s64_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: cttz_zero_undef_s64_s
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64)
+    ; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32)
+    %0:_(s64) = COPY $sgpr0_sgpr1
+    %1:_(s32) = G_CTTZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: cttz_zero_undef_s64_v
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: cttz_zero_undef_s64_v
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]]
+    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32
+    ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTTZ_ZERO_UNDEF]], [[C1]]
+    ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32)
+    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[CTTZ_ZERO_UNDEF1]]
+    ; CHECK: S_ENDPGM 0, implicit [[SELECT]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_CTTZ_ZERO_UNDEF %0
+    S_ENDPGM 0, implicit %1
 ...


        


More information about the llvm-commits mailing list