[llvm] 832c87b - [AMDGPU] Use S_BITCMP0_* to replace AND in optimizeCompareInstr

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 2 09:38:11 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-09-02T09:38:01-07:00
New Revision: 832c87b4fbc820590704eaf02572dff762650a98

URL: https://github.com/llvm/llvm-project/commit/832c87b4fbc820590704eaf02572dff762650a98
DIFF: https://github.com/llvm/llvm-project/commit/832c87b4fbc820590704eaf02572dff762650a98.diff

LOG: [AMDGPU] Use S_BITCMP0_* to replace AND in optimizeCompareInstr

These can be used for reversed conditions if result of the AND
is unused except in the compare:

s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0
s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0

Differential Revision: https://reviews.llvm.org/D109099

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/optimize-compare.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d4f747dc9e8f..64ba869ed2b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8009,7 +8009,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
 
   const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
                                this](int64_t ExpectedValue,
-                                     unsigned SrcSize) -> bool {
+                                     unsigned SrcSize,
+                                     bool IsReversable) -> bool {
     // s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
     // s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
     // s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
@@ -8023,9 +8024,22 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     //
     // If result of the AND is unused except in the compare:
     // s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0
-
-    if (CmpValue != ExpectedValue)
-      return false;
+    //
+    // s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
+    // s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
+    // s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0
+    // s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
+    // s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
+    // s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0
+
+    bool IsReversedCC = false;
+    if (CmpValue != ExpectedValue) {
+      if (!IsReversable)
+        return false;
+      IsReversedCC = CmpValue == (ExpectedValue ^ 1);
+      if (!IsReversedCC)
+        return false;
+    }
 
     MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
     if (!Def || Def->getParent() != CmpInstr.getParent())
@@ -8041,6 +8055,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)
       return false;
 
+    Register DefReg = Def->getOperand(0).getReg();
+    if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
+      return false;
+
     for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
          I != E; ++I) {
       if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
@@ -8052,17 +8070,20 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     SccDef->setIsDead(false);
     CmpInstr.eraseFromParent();
 
-    if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg()))
+    if (!MRI->use_nodbg_empty(DefReg)) {
+      assert(!IsReversedCC);
       return true;
+    }
 
     // Replace AND with unused result with a S_BITCMP.
     // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
     //       process any power of 2.
     MachineBasicBlock *MBB = Def->getParent();
 
-    // TODO: Reverse conditions can use S_BITCMP0_*.
-    unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32
-                                      : AMDGPU::S_BITCMP1_B64;
+    unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
+                                                     : AMDGPU::S_BITCMP1_B32
+                                      : IsReversedCC ? AMDGPU::S_BITCMP0_B64
+                                                     : AMDGPU::S_BITCMP1_B64;
 
     BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
       .add(*SrcOp)
@@ -8077,26 +8098,28 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     break;
   case AMDGPU::S_CMP_EQ_U32:
   case AMDGPU::S_CMP_EQ_I32:
-  case AMDGPU::S_CMP_GE_U32:
-  case AMDGPU::S_CMP_GE_I32:
   case AMDGPU::S_CMPK_EQ_U32:
   case AMDGPU::S_CMPK_EQ_I32:
+    return optimizeCmpAnd(1, 32, true);
+  case AMDGPU::S_CMP_GE_U32:
+  case AMDGPU::S_CMP_GE_I32:
   case AMDGPU::S_CMPK_GE_U32:
   case AMDGPU::S_CMPK_GE_I32:
-    return optimizeCmpAnd(1, 32);
+    return optimizeCmpAnd(1, 32, false);
   case AMDGPU::S_CMP_EQ_U64:
-    return optimizeCmpAnd(1, 64);
+    return optimizeCmpAnd(1, 64, true);
   case AMDGPU::S_CMP_LG_U32:
   case AMDGPU::S_CMP_LG_I32:
-  case AMDGPU::S_CMP_GT_U32:
-  case AMDGPU::S_CMP_GT_I32:
   case AMDGPU::S_CMPK_LG_U32:
   case AMDGPU::S_CMPK_LG_I32:
+    return optimizeCmpAnd(0, 32, true);
+  case AMDGPU::S_CMP_GT_U32:
+  case AMDGPU::S_CMP_GT_I32:
   case AMDGPU::S_CMPK_GT_U32:
   case AMDGPU::S_CMPK_GT_I32:
-    return optimizeCmpAnd(0, 32);
+    return optimizeCmpAnd(0, 32, false);
   case AMDGPU::S_CMP_LG_U64:
-    return optimizeCmpAnd(0, 64);
+    return optimizeCmpAnd(0, 64, true);
   }
 
   return false;

diff  --git a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
index d38bb221553e..b38a2252e379 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
@@ -408,14 +408,14 @@ body:             |
 ...
 
 ---
-name:            and_1_cmp_eq_0
+name:            and_1_cmp_eq_2
 body:             |
-  ; GCN-LABEL: name: and_1_cmp_eq_0
+  ; GCN-LABEL: name: and_1_cmp_eq_2
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
   ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
-  ; GCN:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+  ; GCN:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 2, implicit-def $scc
   ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
@@ -428,7 +428,7 @@ body:             |
 
     %0:sreg_32 = COPY $sgpr0
     %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
-    S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+    S_CMP_EQ_U32 killed %1:sreg_32, 2, implicit-def $scc
     S_CBRANCH_SCC0 %bb.2, implicit $scc
     S_BRANCH %bb.1
 
@@ -1047,3 +1047,296 @@ body:             |
     S_ENDPGM 0
 
 ...
+
+---
+name:            and_1_cmp_eq_u32_0
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_eq_u32_0
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_eq_i32_0
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_eq_i32_0
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_EQ_I32 killed %1:sreg_32, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_eq_u64_0
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_eq_u64_0
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+  ; GCN:   S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+
+    %0:sreg_64 = COPY $sgpr0_sgpr1
+    %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc
+    S_CMP_EQ_U64 killed %1:sreg_64, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_lg_u32_1
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_lg_u32_1
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_LG_U32 killed %1:sreg_32, 1, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_lg_i32_1
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_lg_i32_1
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_LG_I32 killed %1:sreg_32, 1, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_lg_u64_1
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_lg_u64_1
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+  ; GCN:   S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+
+    %0:sreg_64 = COPY $sgpr0_sgpr1
+    %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc
+    S_CMP_LG_U64 killed %1:sreg_64, 1, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_eq_u32_0_used_and
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_eq_u32_0_used_and
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+  ; GCN:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   S_NOP 0, implicit [[S_AND_B32_]]
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+    S_NOP 0, implicit %1
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_ge_u32_0
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_ge_u32_0
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+  ; GCN:   S_CMP_GE_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_GE_U32 killed %1:sreg_32, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name:            and_1_cmp_gt_u32_1
+body:             |
+  ; GCN-LABEL: name: and_1_cmp_gt_u32_1
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+  ; GCN:   S_CMP_GT_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+    S_CMP_GT_U32 killed %1:sreg_32, 1, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list