[llvm] 832c87b - [AMDGPU] Use S_BITCMP0_* to replace AND in optimizeCompareInstr
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 2 09:38:11 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-09-02T09:38:01-07:00
New Revision: 832c87b4fbc820590704eaf02572dff762650a98
URL: https://github.com/llvm/llvm-project/commit/832c87b4fbc820590704eaf02572dff762650a98
DIFF: https://github.com/llvm/llvm-project/commit/832c87b4fbc820590704eaf02572dff762650a98.diff
LOG: [AMDGPU] Use S_BITCMP0_* to replace AND in optimizeCompareInstr
These can be used for reversed conditions if result of the AND
is unused except in the compare:
s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0
s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0
Differential Revision: https://reviews.llvm.org/D109099
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/optimize-compare.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d4f747dc9e8f..64ba869ed2b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8009,7 +8009,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
this](int64_t ExpectedValue,
- unsigned SrcSize) -> bool {
+ unsigned SrcSize,
+ bool IsReversable) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
// s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
// s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
@@ -8023,9 +8024,22 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
//
// If result of the AND is unused except in the compare:
// s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0
-
- if (CmpValue != ExpectedValue)
- return false;
+ //
+ // s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
+ // s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
+ // s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0
+ // s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
+ // s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
+ // s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0
+
+ bool IsReversedCC = false;
+ if (CmpValue != ExpectedValue) {
+ if (!IsReversable)
+ return false;
+ IsReversedCC = CmpValue == (ExpectedValue ^ 1);
+ if (!IsReversedCC)
+ return false;
+ }
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
if (!Def || Def->getParent() != CmpInstr.getParent())
@@ -8041,6 +8055,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)
return false;
+ Register DefReg = Def->getOperand(0).getReg();
+ if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
+ return false;
+
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
I != E; ++I) {
if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
@@ -8052,17 +8070,20 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
SccDef->setIsDead(false);
CmpInstr.eraseFromParent();
- if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg()))
+ if (!MRI->use_nodbg_empty(DefReg)) {
+ assert(!IsReversedCC);
return true;
+ }
// Replace AND with unused result with a S_BITCMP.
// TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
// process any power of 2.
MachineBasicBlock *MBB = Def->getParent();
- // TODO: Reverse conditions can use S_BITCMP0_*.
- unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32
- : AMDGPU::S_BITCMP1_B64;
+ unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
+ : AMDGPU::S_BITCMP1_B32
+ : IsReversedCC ? AMDGPU::S_BITCMP0_B64
+ : AMDGPU::S_BITCMP1_B64;
BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
.add(*SrcOp)
@@ -8077,26 +8098,28 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
break;
case AMDGPU::S_CMP_EQ_U32:
case AMDGPU::S_CMP_EQ_I32:
- case AMDGPU::S_CMP_GE_U32:
- case AMDGPU::S_CMP_GE_I32:
case AMDGPU::S_CMPK_EQ_U32:
case AMDGPU::S_CMPK_EQ_I32:
+ return optimizeCmpAnd(1, 32, true);
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMP_GE_I32:
case AMDGPU::S_CMPK_GE_U32:
case AMDGPU::S_CMPK_GE_I32:
- return optimizeCmpAnd(1, 32);
+ return optimizeCmpAnd(1, 32, false);
case AMDGPU::S_CMP_EQ_U64:
- return optimizeCmpAnd(1, 64);
+ return optimizeCmpAnd(1, 64, true);
case AMDGPU::S_CMP_LG_U32:
case AMDGPU::S_CMP_LG_I32:
- case AMDGPU::S_CMP_GT_U32:
- case AMDGPU::S_CMP_GT_I32:
case AMDGPU::S_CMPK_LG_U32:
case AMDGPU::S_CMPK_LG_I32:
+ return optimizeCmpAnd(0, 32, true);
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMP_GT_I32:
case AMDGPU::S_CMPK_GT_U32:
case AMDGPU::S_CMPK_GT_I32:
- return optimizeCmpAnd(0, 32);
+ return optimizeCmpAnd(0, 32, false);
case AMDGPU::S_CMP_LG_U64:
- return optimizeCmpAnd(0, 64);
+ return optimizeCmpAnd(0, 64, true);
}
return false;
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
index d38bb221553e..b38a2252e379 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
@@ -408,14 +408,14 @@ body: |
...
---
-name: and_1_cmp_eq_0
+name: and_1_cmp_eq_2
body: |
- ; GCN-LABEL: name: and_1_cmp_eq_0
+ ; GCN-LABEL: name: and_1_cmp_eq_2
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
- ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+ ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 2, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
@@ -428,7 +428,7 @@ body: |
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
- S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CMP_EQ_U32 killed %1:sreg_32, 2, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit $scc
S_BRANCH %bb.1
@@ -1047,3 +1047,296 @@ body: |
S_ENDPGM 0
...
+
+---
+name: and_1_cmp_eq_u32_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u32_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_eq_i32_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_i32_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_I32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_eq_u64_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u64_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+
+ %0:sreg_64 = COPY $sgpr0_sgpr1
+ %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_U64 killed %1:sreg_64, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_lg_u32_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_lg_u32_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_LG_U32 killed %1:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_lg_i32_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_lg_i32_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_LG_I32 killed %1:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_lg_u64_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_lg_u64_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+
+ %0:sreg_64 = COPY $sgpr0_sgpr1
+ %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc
+ S_CMP_LG_U64 killed %1:sreg_64, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_eq_u32_0_used_and
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u32_0_used_and
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+ ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: S_NOP 0, implicit [[S_AND_B32_]]
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ S_NOP 0, implicit %1
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_ge_u32_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_ge_u32_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+ ; GCN: S_CMP_GE_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_GE_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_gt_u32_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_gt_u32_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+ ; GCN: S_CMP_GT_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_GT_U32 killed %1:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list