[llvm] cc76da7 - [GlobalISel] Rewrite the elide-br-by-swapping-icmp-ops combine to do less.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 9 13:16:22 PDT 2020
Author: Amara Emerson
Date: 2020-09-09T13:08:16-07:00
New Revision: cc76da7adab71f0b6559ea13069f899b2ecbf70c
URL: https://github.com/llvm/llvm-project/commit/cc76da7adab71f0b6559ea13069f899b2ecbf70c
DIFF: https://github.com/llvm/llvm-project/commit/cc76da7adab71f0b6559ea13069f899b2ecbf70c.diff
LOG: [GlobalISel] Rewrite the elide-br-by-swapping-icmp-ops combine to do less.
This combine previously tried to take sequences like:
%cond = G_ICMP pred, a, b
G_BRCOND %cond, %truebb
G_BR %falsebb
%truebb:
...
%falsebb:
...
and by inverting the compare predicate and swapping branch targets, delete the
G_BR and instead have a single conditional branch to the falsebb. Since in an
earlier patch we have a combine to fold not(icmp) into just an inverted icmp,
we don't need this combine to do as much. This patch instead generalizes the
combine by just looking for:
G_BRCOND %cond, %truebb
G_BR %falsebb
%truebb:
...
%falsebb:
...
and then inverting the condition using a not (xor). The xor can be folded away
in a separate combine. This change also lets us avoid some optimization code
in the IRTranslator.
I also think that deleting G_BRs in the combiner is unnecessary. That's
something that targets can decide to do at selection time and could simplify
generic code in future.
Differential Revision: https://reviews.llvm.org/D86664
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/lib/Target/AMDGPU/AMDGPUCombine.td
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
Removed:
llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index cff6b496cca2..745522d6b98e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -147,9 +147,10 @@ class CombinerHelper {
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
bool applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
- bool matchElideBrByInvertingCond(MachineInstr &MI);
- void applyElideBrByInvertingCond(MachineInstr &MI);
- bool tryElideBrByInvertingCond(MachineInstr &MI);
+ /// If a brcond's true block is not the fallthrough, make it so by inverting
+ /// the condition and swapping operands.
+ bool matchOptBrCondByInvertingCond(MachineInstr &MI);
+ void applyOptBrCondByInvertingCond(MachineInstr &MI);
/// If \p MI is G_CONCAT_VECTORS, try to combine it.
/// Returns true if MI changed.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 50534860bec1..a230f5adfe88 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -245,5 +245,9 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
/// the value \p Val contains a true value.
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
bool IsFP);
+
+/// Returns an integer representing true, as defined by the
+/// TargetBooleanContents.
+int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
} // End namespace llvm.
#endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5b940551dad5..4d038ad7b240 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -145,13 +145,11 @@ def combine_indexed_load_store : GICombineRule<
[{ return Helper.matchCombineIndexedLoadStore(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineIndexedLoadStore(*${root}, ${matchinfo}); }])>;
-// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of
-// all_combines because it wasn't there.
-def elide_br_by_inverting_cond : GICombineRule<
+def opt_brcond_by_inverting_cond : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_BR):$root,
- [{ return Helper.matchElideBrByInvertingCond(*${root}); }]),
- (apply [{ Helper.applyElideBrByInvertingCond(*${root}); }])>;
+ [{ return Helper.matchOptBrCondByInvertingCond(*${root}); }]),
+ (apply [{ Helper.applyOptBrCondByInvertingCond(*${root}); }])>;
def ptr_add_immed_matchdata : GIDefMatchData<"PtrAddChain">;
def ptr_add_immed_chain : GICombineRule<
@@ -416,4 +414,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
known_bits_simplifications, ext_ext_fold,
- not_cmp_fold]>;
+ not_cmp_fold, opt_brcond_by_inverting_cond]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d58ba7cf5a8c..356f08471109 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -881,14 +881,12 @@ void CombinerHelper::applyCombineIndexedLoadStore(
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
}
-bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_BR)
return false;
// Try to match the following:
// bb1:
- // %c(s32) = G_ICMP pred, %a, %b
- // %c1(s1) = G_TRUNC %c(s32)
// G_BRCOND %c1, %bb2
// G_BR %bb3
// bb2:
@@ -898,7 +896,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
// The above pattern does not have a fall through to the successor bb2, always
// resulting in a branch no matter which path is taken. Here we try to find
// and replace that pattern with conditional branch to bb3 and otherwise
- // fallthrough to bb2.
+ // fallthrough to bb2. This is generally better for branch predictors.
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator BrIt(MI);
@@ -913,40 +911,34 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
// Check that the next block is the conditional branch target.
if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
return false;
-
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
- !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
- return false;
return true;
}
-bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
- if (!matchElideBrByInvertingCond(MI))
- return false;
- applyElideBrByInvertingCond(MI);
- return true;
-}
-
-void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
- (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
+ Builder.setInstrAndDebugLoc(*BrCond);
+ LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
+ // FIXME: Does int/fp matter for this? If so, we might need to restrict
+ // this to i1 only since we might not know for sure what kind of
+ // compare generated the condition value.
+ auto True = Builder.buildConstant(
+ Ty, getICmpTrueVal(getTargetLowering(), false, false));
+ auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
- // Invert the G_ICMP condition.
- Observer.changingInstr(*CmpMI);
- CmpMI->getOperand(1).setPredicate(InversePred);
- Observer.changedInstr(*CmpMI);
+ auto *FallthroughBB = BrCond->getOperand(1).getMBB();
+ Observer.changingInstr(MI);
+ MI.getOperand(0).setMBB(FallthroughBB);
+ Observer.changedInstr(MI);
- // Change the conditional branch target.
+ // Change the conditional branch to use the inverted condition and
+ // new target block.
Observer.changingInstr(*BrCond);
+ BrCond->getOperand(0).setReg(Xor.getReg(0));
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
- MI.eraseFromParent();
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 6f8d233043e7..53e6eff2590e 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -740,3 +740,15 @@ bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
}
llvm_unreachable("Invalid boolean contents");
}
+
+int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 5fa44606488b..2187b6121421 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -19,7 +19,6 @@ def fconstant_to_constant : GICombineRule<
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
- elide_br_by_inverting_cond,
fconstant_to_constant]> {
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
let StateClass = "AArch64PreLegalizerCombinerHelperState";
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index d243074aa2fd..d34345e79fa6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -42,8 +42,7 @@ def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
- "AMDGPUGenPreLegalizerCombinerHelper", [all_combines,
- elide_br_by_inverting_cond]> {
+ "AMDGPUGenPreLegalizerCombinerHelper", [all_combines]> {
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll b/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll
deleted file mode 100644
index 89d1ee29b959..000000000000
--- a/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -global-isel -O0 -o - %s | FileCheck %s
-
-%struct.comp = type { i8*, i32, i8*, [3 x i8], i32 }
-
-define void @regbranch() {
-; CHECK-LABEL: regbranch:
-; CHECK: mov {{w[0-9]+}}, #0
-cond_next240.i:
- br i1 false, label %cond_true251.i, label %cond_next272.i
-
-cond_true251.i:
- switch i8 0, label %cond_next272.i [
- i8 42, label %bb268.i
- i8 43, label %bb268.i
- i8 63, label %bb268.i
- ]
-
-bb268.i:
- br label %cond_next272.i
-
-cond_next272.i:
- %len.2.i = phi i32 [ 0, %bb268.i ], [ 0, %cond_next240.i ], [ 0, %cond_true251.i ]
- %tmp278.i = icmp eq i32 %len.2.i, 1
- ret void
-}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
index 051f33dabf4c..6ed879d82b9b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="opt_brcond_by_inverting_cond" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios5.0.0"
@@ -38,8 +38,11 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), [[C]]
- ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
+ ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
+ ; CHECK: G_BRCOND [[XOR]](s1), %bb.2
+ ; CHECK: G_BR %bb.1
; CHECK: bb.1.if.then:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
index e25c84958b9d..c280f000b174 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
@@ -8,6 +8,8 @@
define i16 @const_s16() { ret i16 42 }
define i32 @const_s32() { ret i32 42 }
define i64 @const_s64() { ret i64 1234567890123 }
+ define i32 @const_s32_zero() { ret i32 0 }
+ define i64 @const_s64_zero() { ret i64 0 }
define i8* @const_p0_0() { ret i8* null }
define i32 @fconst_s32() { ret i32 42 }
@@ -81,6 +83,38 @@ body: |
$x0 = COPY %0(s64)
...
+---
+name: const_s32_zero
+legalized: true
+regBankSelected: true
+registers:
+ - { id: 0, class: gpr }
+
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: const_s32_zero
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK: $w0 = COPY [[COPY]]
+ %0(s32) = G_CONSTANT i32 0
+ $w0 = COPY %0(s32)
+...
+
+---
+name: const_s64_zero
+legalized: true
+regBankSelected: true
+registers:
+ - { id: 0, class: gpr }
+
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: const_s64_zero
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $xzr
+ ; CHECK: $x0 = COPY [[COPY]]
+ %0(s64) = G_CONSTANT i64 0
+ $x0 = COPY %0(s64)
+...
+
---
name: const_p0_0
legalized: true
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
index eebfbee8a12e..cb6822bcf1ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
@@ -52,9 +52,10 @@ define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) {
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_xor_b32 s0, s0, -1
; GCN-NEXT: s_and_b32 s0, s0, 1
; GCN-NEXT: s_cmp_lg_u32 s0, 0
-; GCN-NEXT: s_cbranch_scc0 BB3_2
+; GCN-NEXT: s_cbranch_scc1 BB3_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: flat_store_dword v[0:1], v0
@@ -80,9 +81,10 @@ define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) {
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_and_b32 s0, s0, s1
+; GCN-NEXT: s_xor_b32 s0, s0, -1
; GCN-NEXT: s_and_b32 s0, s0, 1
; GCN-NEXT: s_cmp_lg_u32 s0, 0
-; GCN-NEXT: s_cbranch_scc0 BB4_2
+; GCN-NEXT: s_cbranch_scc1 BB4_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: flat_store_dword v[0:1], v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index 88c82b1c3f7c..e25fd7fc43fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -51,11 +51,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_load_dword s0, s[4:5], 0x11
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_cmp_eq_u32 s1, s0
+; CI-NEXT: s_cmp_lg_u32 s1, s0
; CI-NEXT: s_cselect_b32 s0, 1, 0
; CI-NEXT: s_and_b32 s0, s0, 1
; CI-NEXT: s_cmp_lg_u32 s0, 0
-; CI-NEXT: s_cbranch_scc0 BB1_2
+; CI-NEXT: s_cbranch_scc1 BB1_2
; CI-NEXT: ; %bb.1: ; %bb0
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: flat_store_dword v[0:1], v0
@@ -68,11 +68,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9-NEXT: s_cmp_lg_u32 s1, s0
; GFX9-NEXT: s_cselect_b32 s0, 1, 0
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
-; GFX9-NEXT: s_cbranch_scc0 BB1_2
+; GFX9-NEXT: s_cbranch_scc1 BB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: global_store_dword v[0:1], v0, off
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index ec477c9925c9..356f219ba0c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -51,11 +51,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_load_dword s0, s[4:5], 0x10
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_cmp_eq_u32 s1, s0
+; CI-NEXT: s_cmp_lg_u32 s1, s0
; CI-NEXT: s_cselect_b32 s0, 1, 0
; CI-NEXT: s_and_b32 s0, s0, 1
; CI-NEXT: s_cmp_lg_u32 s0, 0
-; CI-NEXT: s_cbranch_scc0 BB1_2
+; CI-NEXT: s_cbranch_scc1 BB1_2
; CI-NEXT: ; %bb.1: ; %bb0
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: flat_store_dword v[0:1], v0
@@ -68,11 +68,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9-NEXT: s_cmp_lg_u32 s1, s0
; GFX9-NEXT: s_cselect_b32 s0, 1, 0
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
-; GFX9-NEXT: s_cbranch_scc0 BB1_2
+; GFX9-NEXT: s_cbranch_scc1 BB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: global_store_dword v[0:1], v0, off
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index 3c550a1a08e1..5f4d4097b23a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -29,9 +29,10 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: BB0_2: ; %Flow
+; GFX9-NEXT: s_xor_b32 s0, s0, -1
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
-; GFX9-NEXT: s_cbranch_scc0 BB0_4
+; GFX9-NEXT: s_cbranch_scc1 BB0_4
; GFX9-NEXT: ; %bb.3: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: global_store_dword v[0:1], v0, off
@@ -109,9 +110,10 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: BB1_2: ; %Flow
+; GFX9-NEXT: s_xor_b32 s0, s0, -1
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
-; GFX9-NEXT: s_cbranch_scc0 BB1_4
+; GFX9-NEXT: s_cbranch_scc1 BB1_4
; GFX9-NEXT: ; %bb.3: ; %bb0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, gv0 at gotpcrel32@lo+4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index d2e7328a384f..9e2f881ee8df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -357,9 +357,10 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: BB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: BB1_3: ; %Flow
-; CHECK-NEXT: s_and_b32 s0, s1, 1
+; CHECK-NEXT: s_xor_b32 s0, s1, -1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 BB1_5
+; CHECK-NEXT: s_cbranch_scc1 BB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
; CHECK-NEXT: s_sub_i32 s0, 0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index cbb77b54aba5..2217e17358b3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -351,9 +351,10 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: BB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: BB1_3: ; %Flow
-; CHECK-NEXT: s_and_b32 s0, s1, 1
+; CHECK-NEXT: s_xor_b32 s0, s1, -1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 BB1_5
+; CHECK-NEXT: s_cbranch_scc1 BB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
; CHECK-NEXT: s_sub_i32 s0, 0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 559d116602e5..402ae90219eb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -323,9 +323,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: BB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: BB1_3: ; %Flow
-; CHECK-NEXT: s_and_b32 s1, s5, 1
+; CHECK-NEXT: s_xor_b32 s1, s5, -1
+; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
-; CHECK-NEXT: s_cbranch_scc0 BB1_5
+; CHECK-NEXT: s_cbranch_scc1 BB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
; CHECK-NEXT: s_sub_i32 s1, 0, s2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 92f93185530f..348f38ef250e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -319,9 +319,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: BB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: BB1_3: ; %Flow
-; CHECK-NEXT: s_and_b32 s1, s5, 1
+; CHECK-NEXT: s_xor_b32 s1, s5, -1
+; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
-; CHECK-NEXT: s_cbranch_scc0 BB1_5
+; CHECK-NEXT: s_cbranch_scc1 BB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
; CHECK-NEXT: s_sub_i32 s1, 0, s2
More information about the llvm-commits
mailing list