[llvm] [GlobalIsel] Combine zext of trunc (episode II) (PR #108305)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 16:40:16 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Thorsten Schütt (tschuett)
<details>
<summary>Changes</summary>
The One with the Sonogram at the End
Either replace zext(trunc(x)) with x
or
If we're actually extending zero bits, then if
SrcSize < DstSize: zext(a & mask)
SrcSize == DstSize: a & mask
SrcSize > DstSize: trunc(a) & mask
Credits: https://reviews.llvm.org/D96031
InstCombinerImpl::visitZExt
LegalizationArtifactCombiner::tryCombineZExt
Test: AMDGPU/GlobalISel/combine-zext-trunc.mir
---
Patch is 564.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108305.diff
68 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+4-3)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+12-11)
- (modified) llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp (+3-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (-14)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp (+91)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCombine.td (+1-1)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll (+8-10)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll (+16-18)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir (+2-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir (+9-7)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir (+21-12)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir (+25-22)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir (+3-3)
- (modified) llvm/test/CodeGen/AArch64/aarch64-mops.ll (+2-4)
- (modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/addsub_ext.ll (+4-44)
- (modified) llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (+5-6)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+69-107)
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+100-97)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll (+39)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll (+8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll (+10-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir (+92-10)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll (+10-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll (+468-324)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll (+241-136)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll (+40-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/is-safe-to-sink-bug.ll (+10-7)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll (+35-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll (+39)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll (+36)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll (+3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll (+57-16)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll (+362-217)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (+303-108)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+51-36)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll (+545-465)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll (+17-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+81-60)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+314-119)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll (+48-37)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll (+46-37)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll (+39)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll (+236)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll (+161-134)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+13-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll (+236)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll (+9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll (+9-2)
- (modified) llvm/test/CodeGen/AMDGPU/bfi_int.ll (+24)
- (modified) llvm/test/CodeGen/AMDGPU/constrained-shift.ll (+12)
- (modified) llvm/test/CodeGen/AMDGPU/fptrunc.ll (+64-23)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll (+33-13)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll (+28-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll (+9)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+13)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+13)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll (+35-18)
- (modified) llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll (+127-56)
- (modified) llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll (+4)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 828532dcffb7d3..bf32dcf5f2c85a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -387,9 +387,6 @@ class CombinerHelper {
/// Transform anyext(trunc(x)) to x.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
- /// Transform zext(trunc(x)) to x.
- bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg);
-
/// Transform trunc (shl x, K) to shl (trunc x), K
/// if K < VT.getScalarSizeInBits().
///
@@ -909,6 +906,10 @@ class CombinerHelper {
bool matchCastOfBuildVector(const MachineInstr &CastMI,
const MachineInstr &BVMI, BuildFnTy &MatchInfo);
+ /// Transform zext of truncate to x or and(x, mask).
+ bool matchCombineZextTrunc(const MachineInstr &ZextMI,
+ const MachineInstr &TruncMI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a595a51d7b01ff..587dbe20e94c35 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -758,15 +758,6 @@ def anyext_trunc_fold: GICombineRule <
(apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
-// Fold (zext (trunc x)) -> x if the source type is same as the destination type
-// and truncated bits are known to be zero.
-def zext_trunc_fold: GICombineRule <
- (defs root:$root, register_matchinfo:$matchinfo),
- (match (wip_match_opcode G_ZEXT):$root,
- [{ return Helper.matchCombineZextTrunc(*${root}, ${matchinfo}); }]),
- (apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
->;
-
def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector<Register, 4>">;
def not_cmp_fold : GICombineRule<
(defs root:$d, not_cmp_fold_matchinfo:$info),
@@ -1894,6 +1885,15 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <
def integer_of_truncate : integer_of_opcode<G_TRUNC>;
+/// Transform zext of truncate to x or and(x, mask).
+def zext_of_truncate : GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_TRUNC $trunc, $src):$TruncMI,
+ (G_ZEXT $root, $trunc):$ZextMI,
+ [{ return Helper.matchCombineZextTrunc(*${ZextMI}, *${TruncMI}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${ZextMI}, ${matchinfo}); }])>;
+
+
def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
@@ -1915,7 +1915,8 @@ def cast_combines: GICombineGroup<[
narrow_binop_and,
narrow_binop_or,
narrow_binop_xor,
- integer_of_truncate
+ integer_of_truncate,
+ zext_of_truncate
]>;
@@ -1951,7 +1952,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+ icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
sext_inreg_to_zext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 547529bbe699ab..5addf93599085a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -333,8 +333,10 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
// For vectors, CSE the element only for now.
LLT Ty = Res.getLLTTy(*getMRI());
- if (Ty.isVector())
+ if (Ty.isFixedVector())
return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val));
+ if (Ty.isScalableVector())
+ return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val));
FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index df9c12bc9c97bd..14d4e413456403 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2524,20 +2524,6 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
}
-bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- if (mi_match(SrcReg, MRI,
- m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
- unsigned DstSize = DstTy.getScalarSizeInBits();
- unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
- return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
- }
- return false;
-}
-
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
const unsigned TruncSize = TruncTy.getScalarSizeInBits();
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 30557e6a2304e6..2171f2f6feb7eb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -359,3 +359,94 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
return false;
}
}
+
+bool CombinerHelper::matchCombineZextTrunc(const MachineInstr &ZextMI,
+ const MachineInstr &TruncMI,
+ BuildFnTy &MatchInfo) {
+ const GZext *Zext = cast<GZext>(&ZextMI);
+ const GTrunc *Trunc = cast<GTrunc>(&TruncMI);
+
+ Register Dst = Zext->getReg(0);
+ Register Mid = Zext->getSrcReg();
+ Register Src = Trunc->getSrcReg();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (!MRI.hasOneNonDBGUse(Mid))
+ return false;
+
+ unsigned DstSize = DstTy.getScalarSizeInBits();
+ unsigned MidSize = MRI.getType(Mid).getScalarSizeInBits();
+ unsigned SrcSize = SrcTy.getScalarSizeInBits();
+
+ // Are the truncated bits known to be zero?
+ if (DstTy == SrcTy &&
+ (KB->getKnownBits(Src).countMinLeadingZeros() >= DstSize - MidSize)) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
+ return true;
+ }
+
+ // If the sizes are just right we can convert this into a logical
+ // 'and', which will be much cheaper than the pair of casts.
+
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(Src & mask)
+ // SrcSize == DstSize: Src & mask
+ // SrcSize > DstSize: trunc(Src) & mask
+
+ if (DstSize == SrcSize) {
+ // Src & mask.
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {DstTy}}) ||
+ !isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ // build mask.
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Mask = B.buildConstant(DstTy, AndValue);
+ B.buildAnd(Dst, Src, Mask);
+ };
+ return true;
+ }
+
+ if (SrcSize < DstSize) {
+ // zext(Src & mask).
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {SrcTy}}) ||
+ !isConstantLegalOrBeforeLegalizer(SrcTy) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}}))
+ return false;
+
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Mask = B.buildConstant(SrcTy, AndValue);
+ auto And = B.buildAnd(SrcTy, Src, Mask);
+ B.buildZExt(Dst, And);
+ };
+ return true;
+ }
+
+ if (SrcSize > DstSize) {
+ // trunc(Src) & mask.
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {DstTy}}) ||
+ !isConstantLegalOrBeforeLegalizer(DstTy) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}}))
+ return false;
+
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Mask = B.buildConstant(DstTy, AndValue);
+ auto Trunc = B.buildTrunc(DstTy, Src);
+ B.buildAnd(Dst, Trunc, Mask);
+ };
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b2a3f9392157d1..25db0e678f49ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -168,6 +168,6 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
def AMDGPURegBankCombiner : GICombiner<
"AMDGPURegBankCombinerImpl",
[unmerge_merge, unmerge_cst, unmerge_undef,
- zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
+ int_minmax_to_med3, ptr_add_immed_chain,
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index de3f323891a36a..ddcc31d23b56d2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1938,14 +1938,14 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i8:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NOLSE-O1-NEXT: mov x8, x0
; CHECK-NOLSE-O1-NEXT: LBB28_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0]
-; CHECK-NOLSE-O1-NEXT: stxrb w9, w1, [x0]
+; CHECK-NOLSE-O1-NEXT: ldxrb w0, [x8]
+; CHECK-NOLSE-O1-NEXT: stxrb w9, w1, [x8]
; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB28_1
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
-; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i8:
@@ -2993,14 +2993,14 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i16:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NOLSE-O1-NEXT: mov x8, x0
; CHECK-NOLSE-O1-NEXT: LBB38_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0]
-; CHECK-NOLSE-O1-NEXT: stxrh w9, w1, [x0]
+; CHECK-NOLSE-O1-NEXT: ldxrh w0, [x8]
+; CHECK-NOLSE-O1-NEXT: stxrh w9, w1, [x8]
; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB38_1
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
-; CHECK-NOLSE-O1-NEXT: mov w0, w8
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i16:
@@ -5996,7 +5996,6 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i8:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
-; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2
; CHECK-NOLSE-O1-NEXT: LBB67_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxrb w0, [x8]
@@ -6103,7 +6102,6 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i16:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
-; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2
; CHECK-NOLSE-O1-NEXT: LBB68_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxrh w0, [x8]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index c6819ff39ed33e..c02390c4df12dd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -746,20 +746,20 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1
+ ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: liveins: $w1, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+ ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.atomicrmw.end:
- ; CHECK-NEXT: liveins: $x8
+ ; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+ ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
; CHECK-NEXT: RET undef $lr, implicit $w0
%res = atomicrmw xchg ptr %ptr, i8 %rhs monotonic, !pcsections !0
ret i8 %res
@@ -999,20 +999,20 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1
+ ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: liveins: $w1, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+ ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.atomicrmw.end:
- ; CHECK-NEXT: liveins: $x8
+ ; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+ ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
; CHECK-NEXT: RET undef $lr, implicit $w0
%res = atomicrmw xchg ptr %ptr, i16 %rhs monotonic, !pcsections !0
ret i16 %res
@@ -1229,11 +1229,10 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
- ; CHECK-NEXT: renamable $w2 = KILL $w2, implicit-def $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
- ; CHECK-NEXT: liveins: $w1, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $w2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
@@ -1242,7 +1241,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
- ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1
@@ -1272,11 +1271,10 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
- ; CHECK-NEXT: renamable $w2 = KILL $w2, implicit-def $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
- ; CHECK-NEXT: liveins: $w1, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $w2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
@@ -1285,7 +1283,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
- ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index c98dcf6ccb7966..f29fa86123c8c4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -49,8 +49,8 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
- ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[TRUNC]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+ ; CHECK-NEXT: %zext:_(s64) = G_AND %arg1, [[C]]
; CHECK-NEXT: $x0 = COPY %zext(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 86fa12aa064acb..3e98a5e8e88009 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -361,10 +361,11 @@ body: |
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 101
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %c(s1)
- ; CHECK-NEXT: %sel:_(s8) = G_ADD [[ZEXT]], %one
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]]
+ ; CHECK-NEXT: %sel:_(s8) = G_ADD [[AND]], %one
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -417,10 +418,11 @@ body: |
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %c(s1)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 6
- ; CHECK-NEXT: %sel:_(s8) = G_SHL [[ZEXT]], [[C]](s8)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 6
+ ; CHECK-NEXT: %sel:_(s8) = G_SHL [[AND]...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108305
More information about the llvm-commits
mailing list