[llvm-branch-commits] [llvm] [GISel] Combine compare of bitfield extracts or'd together. (PR #146055)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 27 03:48:09 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
Equivalent of the previous DAG patch for GISel.
The shifts are BFXs in GISel, so the canonical form of the entire expression
is different than in the DAG. The mask is not at the root of the expression, it
remains on the leaves instead.
See #<!-- -->136727
---
Patch is 25.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146055.diff
5 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+2)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+10-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp (+89)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir (+326)
- (modified) llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll (+56-138)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index c15263e0b06f8..5ec82c30f268f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -641,6 +641,8 @@ class CombinerHelper {
/// KnownBits information.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const;
+ bool combineMergedBFXCompare(MachineInstr &MI) const;
+
/// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 4a92dc16c1bf4..cba46a5edf9ec 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1085,6 +1085,14 @@ def double_icmp_zero_or_combine: GICombineRule<
(G_ICMP $root, $p, $ordst, 0))
>;
+// Transform ((X | (G_UBFX X, ...) | ...) == 0) (or != 0)
+// into a compare of a extract/mask of X
+def icmp_merged_bfx_combine: GICombineRule<
+ (defs root:$root),
+ (combine (G_ICMP $dst, $p, $src, 0):$root,
+ [{ return Helper.combineMergedBFXCompare(*${root}); }])
+>;
+
def and_or_disjoint_mask : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -2052,7 +2060,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
simplify_neg_minmax, combine_concat_vector,
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
- combine_use_vector_truncate, merge_combines, overflow_combines]>;
+ combine_use_vector_truncate, merge_combines, overflow_combines,
+ icmp_merged_bfx_combine]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index fc40533cf3dc9..e1d43f37bac13 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -140,3 +140,92 @@ bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI,
return false;
}
+
+bool CombinerHelper::combineMergedBFXCompare(MachineInstr &MI) const {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ ICmpInst::Predicate CC = Cmp->getCond();
+ if (CC != CmpInst::ICMP_EQ && CC != CmpInst::ICMP_NE)
+ return false;
+
+ Register CmpLHS = Cmp->getLHSReg();
+ Register CmpRHS = Cmp->getRHSReg();
+
+ LLT OpTy = MRI.getType(CmpLHS);
+ if (!OpTy.isScalar() || OpTy.isPointer())
+ return false;
+
+ assert(isZeroOrZeroSplat(CmpRHS, /*AllowUndefs=*/false));
+
+ Register Src;
+ const auto IsSrc = [&](Register R) {
+ if (!Src) {
+ Src = R;
+ return true;
+ }
+
+ return Src == R;
+ };
+
+ MachineInstr *CmpLHSDef = MRI.getVRegDef(CmpLHS);
+ if (CmpLHSDef->getOpcode() != TargetOpcode::G_OR)
+ return false;
+
+ APInt PartsMask(OpTy.getSizeInBits(), 0);
+ SmallVector<MachineInstr *> Worklist = {CmpLHSDef};
+ while (!Worklist.empty()) {
+ MachineInstr *Cur = Worklist.pop_back_val();
+
+ Register Dst = Cur->getOperand(0).getReg();
+ if (!MRI.hasOneUse(Dst) && Dst != Src)
+ return false;
+
+ if (Cur->getOpcode() == TargetOpcode::G_OR) {
+ Worklist.push_back(MRI.getVRegDef(Cur->getOperand(1).getReg()));
+ Worklist.push_back(MRI.getVRegDef(Cur->getOperand(2).getReg()));
+ continue;
+ }
+
+ if (Cur->getOpcode() == TargetOpcode::G_UBFX) {
+ Register Op = Cur->getOperand(1).getReg();
+ Register Width = Cur->getOperand(2).getReg();
+ Register Off = Cur->getOperand(3).getReg();
+
+ auto WidthCst = getIConstantVRegVal(Width, MRI);
+ auto OffCst = getIConstantVRegVal(Off, MRI);
+ if (!WidthCst || !OffCst || !IsSrc(Op))
+ return false;
+
+ unsigned Start = OffCst->getZExtValue();
+ unsigned End = Start + WidthCst->getZExtValue();
+ if (End > OpTy.getScalarSizeInBits())
+ return false;
+ PartsMask.setBits(Start, End);
+ continue;
+ }
+
+ if (Cur->getOpcode() == TargetOpcode::G_AND) {
+ Register LHS = Cur->getOperand(1).getReg();
+ Register RHS = Cur->getOperand(2).getReg();
+
+ auto MaskCst = getIConstantVRegVal(RHS, MRI);
+ if (!MaskCst || !MaskCst->isMask() || !IsSrc(LHS))
+ return false;
+
+ PartsMask |= *MaskCst;
+ continue;
+ }
+
+ return false;
+ }
+
+ if (!PartsMask.isMask() || !Src)
+ return false;
+
+ assert(OpTy == MRI.getType(Src) && "Ignored a type casting operation?");
+ auto MaskedSrc =
+ Builder.buildAnd(OpTy, Src, Builder.buildConstant(OpTy, PartsMask));
+ Builder.buildICmp(CC, Cmp->getReg(0), MaskedSrc, CmpRHS, Cmp->getFlags());
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir
new file mode 100644
index 0000000000000..b96a6772010ed
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir
@@ -0,0 +1,326 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck %s
+
+---
+name: basic_i64_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: basic_i64_2x5
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s64), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s64) = COPY $vgpr0_vgpr1
+ %mask:_(s64) = G_CONSTANT i64 31
+ %reg_mask:_(s64) = G_AND %reg, %mask
+ %k:_(s64) = G_CONSTANT i64 5
+ %bfx:_(s64) = G_UBFX %reg, %k, %k
+ %x:_(s64) = G_OR %reg_mask, %bfx
+ %zero:_(s64) = G_CONSTANT i64 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_i32_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_i32_2x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_ne_i32_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_ne_i32_2x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(ne), [[AND]](s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(ne), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_i32_5x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_i32_5x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %five:_(s32) = G_CONSTANT i32 5
+ %bfx1:_(s32) = G_UBFX %reg, %five, %five
+ %x1:_(s32) = G_OR %reg_mask, %bfx1
+ %k2:_(s32) = G_CONSTANT i32 10
+ %bfx2:_(s32) = G_UBFX %reg, %k2, %five
+ %x2:_(s32) = G_OR %x1, %bfx2
+ %k3:_(s32) = G_CONSTANT i32 15
+ %bfx3:_(s32) = G_UBFX %reg, %k3, %five
+ %x3:_(s32) = G_OR %x2, %bfx3
+ %k4:_(s32) = G_CONSTANT i32 20
+ %bfx4:_(s32) = G_UBFX %reg, %k4, %five
+ %x4:_(s32) = G_OR %x3, %bfx4
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x4, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_i16_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_i16_2x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg_trunc:_(s16) = G_TRUNC %reg(s32)
+ ; CHECK-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND %reg_trunc, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s16), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg_trunc:_(s16) = G_TRUNC %reg
+ %mask:_(s16) = G_CONSTANT i16 31
+ %reg_mask:_(s16) = G_AND %reg_trunc, %mask
+ %k:_(s16) = G_CONSTANT i16 5
+ %bfx:_(s16) = G_UBFX %reg_trunc, %k, %k
+ %x:_(s16) = G_OR %reg_mask, %bfx
+ %zero:_(s16) = G_CONSTANT i16 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_sbfx
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: unsupported_sbfx
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %bfx:_(s32) = G_SBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_SBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_src_changes
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: unsupported_src_changes
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %bfx:_(s32) = G_SBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg2:_(s32) = COPY $vgpr1
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg2, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_SBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_holes_in_mask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: unsupported_holes_in_mask
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 6
+ ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg2:_(s32) = COPY $vgpr1
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg2, %mask
+ %k:_(s32) = G_CONSTANT i32 6
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_bfx_out_of_range
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: unsupported_bfx_out_of_range
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask
+ ; CHECK-NEXT: %width:_(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: %off:_(s32) = G_CONSTANT i32 26
+ ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %off(s32), %width
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg2:_(s32) = COPY $vgpr1
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg2, %mask
+ %width:_(s32) = G_CONSTANT i32 12
+ %off:_(s32) = G_CONSTANT i32 26
+ %bfx:_(s32) = G_UBFX %reg, %off, %width
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_cc
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: unsupported_cc
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(ule), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(ule), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
diff --git a/llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll b/llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll
index 5a25ec29af481..dd9684797184b 100644
--- a/llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/workitems-intrinsics-opts.ll
@@ -9,77 +9,36 @@
; (workitem_id_x | workitem_id_y | workitem_id_z) == 0
define i1 @workitem_zero() {
-; DAGISEL-GFX9-LABEL: workitem_zero:
-; DAGISEL-GFX9: ; %bb.0: ; %entry
-; DAGISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DAGISEL-GFX9-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; DAGISEL-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; DAGISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; DAGISEL-GFX942-LABEL: workitem_zero:
-; DAGISEL-GFX942: ; %bb.0: ; %entry
-; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; DAGISEL-GFX942-NEXT: s_nop 1
-; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
-;
-; DAGISEL-GFX12-LABEL: workitem_zero:
-; DAGISEL-GFX12: ; %bb.0: ; %entry
-; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; DAGISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
-; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX8-LABEL: workitem_zero:
-; GISEL-GFX8: ; %bb.0: ; %entry
-; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
-; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: workitem_zero:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX942-LABEL: workitem_zero:
-; GISEL-GFX942: ; %bb.0: ; %entry
-; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
-; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
-; GISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GISEL-GFX942-NEXT: s_nop 1
-; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: workitem_zero:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX12-LABEL: workitem_zero:
-; GISEL-GFX12: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146055
More information about the llvm-branch-commits
mailing list