[llvm] f3645c7 - [AMDGPU] Use S_BITCMP1_* to replace AND in optimizeCompareInstr

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 1 16:10:41 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-09-01T15:59:12-07:00
New Revision: f3645c792a65454bb0e9a09236ad13de1e1bbaad

URL: https://github.com/llvm/llvm-project/commit/f3645c792a65454bb0e9a09236ad13de1e1bbaad
DIFF: https://github.com/llvm/llvm-project/commit/f3645c792a65454bb0e9a09236ad13de1e1bbaad.diff

LOG: [AMDGPU] Use S_BITCMP1_* to replace AND in optimizeCompareInstr

Differential Revision: https://reviews.llvm.org/D109082

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
    llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
    llvm/test/CodeGen/AMDGPU/optimize-compare.mir
    llvm/test/CodeGen/AMDGPU/select-i1.ll
    llvm/test/CodeGen/AMDGPU/trunc.ll
    llvm/test/CodeGen/AMDGPU/wave32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b3648a57d5cd..d4f747dc9e8f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8008,7 +8008,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     return false;
 
   const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
-                               this](int64_t ExpectedValue) -> bool {
+                               this](int64_t ExpectedValue,
+                                     unsigned SrcSize) -> bool {
     // s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
     // s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
     // s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
@@ -8019,10 +8020,9 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     // s_cmp_gt_u32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
     // s_cmp_gt_i32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
     // s_cmp_lg_u64 (s_and_b64 $src, 1), 0 => s_and_b64 $src, 1
-
-    // TODO: Fold this into s_bitcmp* if result of an AND is unused.
-    // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
-    //       process any power of 2.
+    //
+    // If result of the AND is unused except in the compare:
+    // s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0
 
     if (CmpValue != ExpectedValue)
       return false;
@@ -8035,8 +8035,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
         Def->getOpcode() != AMDGPU::S_AND_B64)
       return false;
 
-    if ((!Def->getOperand(1).isImm() || Def->getOperand(1).getImm() != 1) &&
-        (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1))
+    MachineOperand *SrcOp = &Def->getOperand(1);
+    if (SrcOp->isImm() && SrcOp->getImm() == 1)
+      SrcOp = &Def->getOperand(2);
+    else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)
       return false;
 
     for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
@@ -8050,6 +8052,23 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     SccDef->setIsDead(false);
     CmpInstr.eraseFromParent();
 
+    if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg()))
+      return true;
+
+    // Replace AND with unused result with a S_BITCMP.
+    // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
+    //       process any power of 2.
+    MachineBasicBlock *MBB = Def->getParent();
+
+    // TODO: Reverse conditions can use S_BITCMP0_*.
+    unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32
+                                      : AMDGPU::S_BITCMP1_B64;
+
+    BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
+      .add(*SrcOp)
+      .addImm(0);
+    Def->eraseFromParent();
+
     return true;
   };
 
@@ -8060,22 +8079,24 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   case AMDGPU::S_CMP_EQ_I32:
   case AMDGPU::S_CMP_GE_U32:
   case AMDGPU::S_CMP_GE_I32:
-  case AMDGPU::S_CMP_EQ_U64:
   case AMDGPU::S_CMPK_EQ_U32:
   case AMDGPU::S_CMPK_EQ_I32:
   case AMDGPU::S_CMPK_GE_U32:
   case AMDGPU::S_CMPK_GE_I32:
-    return optimizeCmpAnd(1);
+    return optimizeCmpAnd(1, 32);
+  case AMDGPU::S_CMP_EQ_U64:
+    return optimizeCmpAnd(1, 64);
   case AMDGPU::S_CMP_LG_U32:
   case AMDGPU::S_CMP_LG_I32:
   case AMDGPU::S_CMP_GT_U32:
   case AMDGPU::S_CMP_GT_I32:
-  case AMDGPU::S_CMP_LG_U64:
   case AMDGPU::S_CMPK_LG_U32:
   case AMDGPU::S_CMPK_LG_I32:
   case AMDGPU::S_CMPK_GT_U32:
   case AMDGPU::S_CMPK_GT_I32:
-    return optimizeCmpAnd(0);
+    return optimizeCmpAnd(0, 32);
+  case AMDGPU::S_CMP_LG_U64:
+    return optimizeCmpAnd(0, 64);
   }
 
   return false;

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index 6038f83d5cfc..1bd1a22bf5a9 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -396,7 +396,7 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0x83
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x80
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_and_b32 s0, 1, s0
+; GCN-NEXT:    s_bitcmp1_b32 s0, 0
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GCN-NEXT:    flat_store_short v[0:1], v0

diff  --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 4d9f1f2f8548..d102ddb025a1 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -174,7 +174,7 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
 ; GCN-NEXT:    s_add_u32 s0, s0, s9
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_and_b32 s4, 1, s4
+; GCN-NEXT:    s_bitcmp1_b32 s4, 0
 ; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GCN-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; GCN-NEXT:    s_mov_b32 s32, 0
@@ -219,7 +219,7 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
 ; GCN-NEXT:    s_add_u32 s0, s0, s9
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_and_b32 s4, 1, s4
+; GCN-NEXT:    s_bitcmp1_b32 s4, 0
 ; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GCN-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; GCN-NEXT:    s_mov_b32 s32, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
index ddb01732e950..113f121b2519 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
@@ -16,7 +16,7 @@ declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind re
 ; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x70
 ; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x94
 
-; GCN-DAG: s_and_b32 [[AND_I1:s[0-9]+]], 1, s{{[0-9]+}}
+; GCN-DAG: s_bitcmp1_b32 s{{[0-9]+}}, 0
 
 ; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
 ; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]

diff  --git a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
index 6d3e30204c55..d38bb221553e 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
@@ -8,7 +8,40 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_eq_u32_1_used_and
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_eq_u32_1_used_and
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
   ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_NOP 0, implicit [[S_AND_B32_]]
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -21,6 +54,7 @@ body:             |
 
     %0:sreg_32 = COPY $sgpr0
     %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_NOP 0, implicit %1
     S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc
     S_CBRANCH_SCC0 %bb.2, implicit $scc
     S_BRANCH %bb.1
@@ -40,7 +74,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -177,7 +211,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[COPY]], 1, implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -446,7 +480,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -478,7 +512,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -510,7 +544,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -542,7 +576,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -607,7 +641,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -639,7 +673,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -671,7 +705,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -703,7 +737,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -735,7 +769,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -767,7 +801,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -799,7 +833,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -831,7 +865,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -863,7 +897,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -895,7 +929,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -927,7 +961,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -959,7 +993,7 @@ body:             |
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+  ; GCN:   S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -983,3 +1017,33 @@ body:             |
     S_ENDPGM 0
 
 ...
+
+---
+name:            and_1_cmp_eq_u32_1_imm_src
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_eq_u32_1_imm_src
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   S_BITCMP1_B32 11, 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = S_AND_B32 1, 11, implicit-def dead $scc
+    S_CMP_EQ_U32 killed %0:sreg_32, 1, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/select-i1.ll b/llvm/test/CodeGen/AMDGPU/select-i1.ll
index 479c103d1924..1024f06608d7 100644
--- a/llvm/test/CodeGen/AMDGPU/select-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-i1.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1
 
 ; GCN-LABEL: {{^}}s_minmax_i1:
 ; GCN: s_load_dword [[LOAD:s[0-9]+]],
-; GCN: s_and_b32 [[COND:s[0-9]+]], 1, [[LOAD]]
+; GCN: s_bitcmp1_b32 [[LOAD]], 0
 ; GCN: s_cselect_b64 vcc, -1, 0
 ; GCN-DAG: s_lshr_b32 [[A:s[0-9]+]], [[LOAD]], 8
 ; GCN-DAG: s_lshr_b32 [[B:s[0-9]+]], [[LOAD]], 16

diff  --git a/llvm/test/CodeGen/AMDGPU/trunc.ll b/llvm/test/CodeGen/AMDGPU/trunc.ll
index 38841c4bb900..4cb532a1928b 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc.ll
@@ -96,7 +96,7 @@ define amdgpu_kernel void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a)
 ; GCN-LABEL: {{^}}s_trunc_i64_to_i1:
 ; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
 ; VI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4c
-; GCN: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]]
+; GCN: s_bitcmp1_b32 s[[SLO]], 0
 ; SI: s_cselect_b64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], -1, 0
 ; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]]
 ; VI: s_cselect_b32 {{s[0-9]+}}, 63, -12

diff  --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 17200309bf9f..a9fc03a6874d 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -418,7 +418,7 @@ define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 }
 
 ; GCN-LABEL: {{^}}test_div_fmas_f32:
-; GCN:     s_and_b32 s{{[0-9]+}}, 1
+; GCN:     s_bitcmp1_b32 s{{[0-9]+}}, 0
 ; GFX1032: s_cselect_b32 vcc_lo, -1, 0
 ; GFX1064: s_cselect_b64 vcc, -1, 0
 ; GCN:     v_div_fmas_f32 v{{[0-9]+}}, {{[vs][0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -429,7 +429,7 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, float %a,
 }
 
 ; GCN-LABEL: {{^}}test_div_fmas_f64:
-; GCN: s_and_b32 s{{[0-9]+}}, 1
+; GCN:     s_bitcmp1_b32 s{{[0-9]+}}, 0
 ; GFX1032: s_cselect_b32 vcc_lo, -1, 0
 ; GFX1064: s_cselect_b64 vcc, -1, 0
 ; GCN-DAG: v_div_fmas_f64 v[{{[0-9:]+}}], {{[vs]}}[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]


        


More information about the llvm-commits mailing list