[llvm] [GlobalIsel] Combine freeze (PR #93239)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Sat May 25 08:41:17 PDT 2024
https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/93239
>From 95486e7c020c3d368c9a58f6b98fe29f7c449171 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 23 May 2024 18:40:55 +0200
Subject: [PATCH 1/6] [GlobalIsel] Combine freeze
---
.../include/llvm/Target/GlobalISel/Combine.td | 16 ++-
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 102 ++++++++++++++++--
2 files changed, 105 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 8012f91922777..40a6d69f7e372 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1713,6 +1713,17 @@ def integer_reassoc_combines: GICombineGroup<[
APlusBMinusCPlusA
]>;
+def freeze_of_not_undef_and_poison : GICombineRule<
+ (defs root:$root),
+ (match (G_FREEZE $root, $src),
+ [{ return isGuaranteedNotToBeUndefOrPoison(${src}.getReg(), MRI); }]),
+ (apply (GIReplaceReg $root, $src))>;
+
+def freeze_combines: GICombineGroup<[
+ freeze_of_not_undef_and_poison,
+ push_freeze_to_prevent_poison_from_propagating
+]>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -1771,7 +1782,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
constant_fold_fp_binop]>;
def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
- vector_ops_combines,
+ vector_ops_combines, freeze_combines,
insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
combine_extracted_vector_load,
undef_combines, identity_combines, phi_combines,
@@ -1793,8 +1804,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
- sext_trunc, zext_trunc, combine_shuffle_concat,
- push_freeze_to_prevent_poison_from_propagating]>;
+ sext_trunc, zext_trunc, combine_shuffle_concat]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index f455482e02943..93a76fde6ab27 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1724,6 +1724,39 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
}
}
+/// Shifts return poison if shiftwidth is larger than the bitwidth.
+static bool shiftAmountKnownInRange(Register ShiftAmount,
+ const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(ShiftAmount);
+
+ if (Ty.isScalableVector())
+ return false; // Can't tell, just return false to be safe
+
+ if (Ty.isScalar()) {
+ std::optional<ValueAndVReg> Val =
+ getIConstantVRegValWithLookThrough(ShiftAmount, MRI);
+ if (!Val)
+ return false;
+ return Val->Value.ult(Ty.getScalarSizeInBits());
+ }
+
+ GBuildVector *BV = getOpcodeDef<GBuildVector>(ShiftAmount, MRI);
+ if (!BV)
+ return false;
+
+ unsigned Sources = BV->getNumSources();
+ for (unsigned I = 0; I < Sources; ++I) {
+ std::optional<ValueAndVReg> Val =
+ getIConstantVRegValWithLookThrough(BV->getSourceReg(I), MRI);
+ if (!Val)
+ return false;
+ if (!Val->Value.ult(Ty.getScalarSizeInBits()))
+ return false;
+ }
+
+ return true;
+}
+
namespace {
enum class UndefPoisonKind {
PoisonOnly = (1 << 0),
@@ -1732,11 +1765,11 @@ enum class UndefPoisonKind {
};
}
-[[maybe_unused]] static bool includesPoison(UndefPoisonKind Kind) {
+static bool includesPoison(UndefPoisonKind Kind) {
return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
}
-[[maybe_unused]] static bool includesUndef(UndefPoisonKind Kind) {
+static bool includesUndef(UndefPoisonKind Kind) {
return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
}
@@ -1745,18 +1778,55 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
UndefPoisonKind Kind) {
MachineInstr *RegDef = MRI.getVRegDef(Reg);
- if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef)) {
- if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
- GMI->hasPoisonGeneratingFlags())
- return true;
- } else {
- // Conservatively return true.
- return true;
- }
+ if (ConsiderFlagsAndMetadata && includesPoison(Kind))
+ if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef))
+ if (GMI->hasPoisonGeneratingFlags())
+ return true;
+ // Check whether opcode is a poison/undef-generating operation.
switch (RegDef->getOpcode()) {
case TargetOpcode::G_FREEZE:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
return false;
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ return includesPoison(Kind) &&
+ !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI);
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ // fptosi/ui yields poison if the resulting value does not fit in the
+ // destination type.
+ return true;
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_ABS:
+ case TargetOpcode::G_CTPOP:
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_BITREVERSE:
+ case TargetOpcode::G_FSHL:
+ case TargetOpcode::G_FSHR:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_PTRMASK:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_SSUBSAT:
+ case TargetOpcode::G_USUBSAT:
+ return false;
+ case TargetOpcode::G_SSHLSAT:
+ case TargetOpcode::G_USHLSAT:
+ return includesPoison(Kind) &&
+ !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI);
default:
return !isa<GCastOp>(RegDef) && !isa<GBinOp>(RegDef);
}
@@ -1776,6 +1846,18 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
return true;
case TargetOpcode::G_IMPLICIT_DEF:
return !includesUndef(Kind);
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ return false;
+ case TargetOpcode::G_BUILD_VECTOR: {
+ GBuildVector *BV = cast<GBuildVector>(RegDef);
+ unsigned NumSources = BV->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I)
+ if (!::isGuaranteedNotToBeUndefOrPoison(BV->getSourceReg(I), MRI,
+ Depth + 1, Kind))
+ return false;
+ return true;
+ }
default: {
auto MOCheck = [&](const MachineOperand &MO) {
if (!MO.isReg())
>From 14c2b5d692746347cf7a100d007d7aa331f04770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 23 May 2024 18:48:46 +0200
Subject: [PATCH 2/6] mend
---
.../AArch64/GlobalISel/combine-freeze.mir | 656 ++++++++++++++++++
1 file changed, 656 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
new file mode 100644
index 0000000000000..10df96bc73ed7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -0,0 +1,656 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+
+...
+---
+name: freeze_register
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_register
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %2:_(s64) = G_FREEZE %0
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: freeze_constant
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_constant
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = G_CONSTANT i64 9
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: freeze_fconstant
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_fconstant
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 9.000000e+00
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = G_FCONSTANT double 9.0
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_undef
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_undef
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[DEF]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = G_IMPLICIT_DEF
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_freeze
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_freeze
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = G_FREEZE %0
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_buildvector
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_buildvector
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+ ; CHECK-NEXT: $q0 = COPY [[FREEZE]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(s32) = COPY $w0
+ %1:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %0(s32), %0(s32), %0(s32)
+ %2:_(<4 x s32>) = G_FREEZE %1
+ $q0 = COPY %2(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: freeze_buildvector_const
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_buildvector_const
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %c:_(s32) = G_CONSTANT i32 6
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %c(s32), %c(s32), %c(s32), %c(s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+ ; CHECK-NEXT: $q0 = COPY [[FREEZE]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(s32) = COPY $w0
+ %c:_(s32) = G_CONSTANT i32 6
+ %1:_(<4 x s32>) = G_BUILD_VECTOR %c(s32), %c(s32), %c(s32), %c(s32)
+ %2:_(<4 x s32>) = G_FREEZE %1
+ $q0 = COPY %2(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: freeze_disjoint_or_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_disjoint_or_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: $x0 = COPY %c(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = disjoint G_OR %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_or_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_or_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: $x0 = COPY %c(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_OR %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_nneg_zext_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_nneg_zext_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
+ ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s32) = G_CONSTANT i32 9
+ %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = nneg G_ZEXT %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_zext_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_zext_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
+ ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s32) = G_CONSTANT i32 9
+ %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_ZEXT %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_udiv_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_udiv_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV %c, %c
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_UDIV %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_exact_udiv_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_exact_udiv_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = exact G_UDIV %c, %c
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = exact G_UDIV %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_mul_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_mul_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL %c, %c
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_MUL %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_nsw_mul_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_nsw_mul_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = nsw G_MUL %c, %c
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = nsw G_MUL %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_trunc_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_trunc_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
+ ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s32) = G_TRUNC %c
+ %2:_(s32) = G_FREEZE %1
+ $w0 = COPY %2(s32)
+ RET_ReallyLR implicit $q0
+...
+---
+name: freeze_nuw_trunc_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_nuw_trunc_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
+ ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s32) = nuw G_TRUNC %c
+ %2:_(s32) = G_FREEZE %1
+ $w0 = COPY %2(s32)
+ RET_ReallyLR implicit $q0
+...
+---
+name: freeze_add_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_add_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %c, %c
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_ADD %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_nuw_add_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_nuw_add_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD %c, %c
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = nuw G_ADD %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_xor_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_xor_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR %c, %c
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[XOR]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_XOR %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_fptosi_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_fptosi_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI %c(s64)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[FPTOSI]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_FPTOSI %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_fptoui_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_fptoui_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI %c(s64)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[FPTOUI]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_FPTOUI %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_shl_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_shl_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL %c, %c(s64)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[SHL]]
+ ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_SHL %c, %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_ctlz_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_ctlz_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ %c(s64)
+ ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_CTLZ %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_cttz_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_cttz_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ %c(s64)
+ ; CHECK-NEXT: $x0 = COPY [[CTTZ]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_CTTZ %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_bswap_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_bswap_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP %c
+ ; CHECK-NEXT: $x0 = COPY [[BSWAP]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %1:_(s64) = G_BSWAP %c
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_icmp_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_icmp_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %c(s64), %d
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
+ ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %cmp:_(s1) = G_ICMP intpred(eq), %c(s64), %d
+ %2:_(s1) = G_FREEZE %cmp
+ %ext:_(s64) = G_ZEXT %2(s1)
+ $x0 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_fcmp_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_fcmp_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
+ ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+ %2:_(s1) = G_FREEZE %cmp
+ %ext:_(s64) = G_ZEXT %2(s1)
+ $x0 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: freeze_zext_fcmp_fold_barrier
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: freeze_zext_fcmp_fold_barrier
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
+ ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %cst:_(s64) = G_CONSTANT i64 9
+ %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+ %ext:_(s64) = G_ZEXT %cmp(s1)
+ %2:_(s64) = G_FREEZE %ext
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
>From 77e8d53a45b1fdefa0c3f4d12d47c68bb4bb9856 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 23 May 2024 23:10:53 +0200
Subject: [PATCH 3/6] more
---
.../include/llvm/Target/GlobalISel/Combine.td | 2 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 +-
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +-
.../GlobalISel/combine-extract-vec-elt.mir | 4 +-
.../AArch64/GlobalISel/combine-freeze.mir | 90 +---
.../GlobalISel/combine-insert-vec-elt.mir | 6 +-
...galizer-combiner-divrem-insertpt-crash.mir | 3 +-
llvm/test/CodeGen/AArch64/fast-isel-select.ll | 454 ++++++++++++----
llvm/test/CodeGen/AMDGPU/div_i128.ll | 490 ++++++++----------
9 files changed, 592 insertions(+), 464 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 40a6d69f7e372..47f73daf20891 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1636,7 +1636,7 @@ extract_vector_element_build_vector_trunc5,
extract_vector_element_build_vector_trunc6,
extract_vector_element_build_vector_trunc7,
extract_vector_element_build_vector_trunc8,
-extract_vector_element_freeze,
+//extract_vector_element_freeze,
extract_vector_element_shuffle_vector,
insert_vector_element_extract_vector_element
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 4cc602b5c8709..abecee4259030 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -265,11 +265,12 @@ bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
}
}
- cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+ // FIXME: observer must be aware of dropping
+ // cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
// Eliminate freeze if all operands are guaranteed non-poison.
if (!MaybePoisonOperand) {
- MatchInfo = [=](MachineIRBuilder &B) { MRI.replaceRegWith(DstOp, OrigOp); };
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(DstOp, OrigOp); };
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 93a76fde6ab27..e8438be94b3cd 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1848,7 +1848,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
return !includesUndef(Kind);
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
- return false;
+ return true;
case TargetOpcode::G_BUILD_VECTOR: {
GBuildVector *BV = cast<GBuildVector>(RegDef);
unsigned NumSources = BV->getNumSources();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index d5d33742148ad..70241e71aa593 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -361,8 +361,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: %idx:_(s64) = COPY $x1
- ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s64)
- ; CHECK-NEXT: %extract:_(s64) = G_FREEZE [[EVEC]]
+ ; CHECK-NEXT: %fvec:_(<2 x s64>) = G_FREEZE %vec
+ ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s64)
; CHECK-NEXT: $x0 = COPY %extract(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%vec:_(<2 x s64>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index 10df96bc73ed7..dfa45d96fe94c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -31,8 +31,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[C]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%1:_(s64) = G_CONSTANT i64 9
@@ -51,8 +50,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 9.000000e+00
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[C]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%1:_(s64) = G_FCONSTANT double 9.0
@@ -129,8 +127,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %c:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %c(s32), %c(s32), %c(s32), %c(s32)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
- ; CHECK-NEXT: $q0 = COPY [[FREEZE]](<4 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%c:_(s32) = G_CONSTANT i32 6
@@ -149,8 +146,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: $x0 = COPY %c(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -171,8 +167,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: $x0 = COPY %c(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -193,10 +188,9 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
- ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: %3:_(s64) = nneg G_ZEXT %c(s32)
+ ; CHECK-NEXT: $x0 = COPY %3(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s32) = G_CONSTANT i32 9
@@ -216,8 +210,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -241,8 +234,7 @@ body: |
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV %c, %c
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[UDIV]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s64) = G_CONSTANT i64 9
@@ -264,8 +256,7 @@ body: |
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = exact G_UDIV %c, %c
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[UDIV]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s64) = G_CONSTANT i64 9
@@ -287,8 +278,7 @@ body: |
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL %c, %c
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[MUL]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s64) = G_CONSTANT i64 9
@@ -310,8 +300,7 @@ body: |
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = nsw G_MUL %c, %c
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[MUL]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s64) = G_CONSTANT i64 9
@@ -331,8 +320,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -354,9 +342,8 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = nuw G_TRUNC %c(s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
@@ -379,8 +366,7 @@ body: |
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %c, %c
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[ADD]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s64) = G_CONSTANT i64 9
@@ -402,8 +388,7 @@ body: |
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD %c, %c
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[ADD]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s64) = G_CONSTANT i64 9
@@ -425,8 +410,7 @@ body: |
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR %c, %c
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[XOR]]
- ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s64) = G_CONSTANT i64 9
@@ -515,8 +499,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ %c(s64)
; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -538,8 +521,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ %c(s64)
; CHECK-NEXT: $x0 = COPY [[CTTZ]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -561,8 +543,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+ ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP %c
; CHECK-NEXT: $x0 = COPY [[BSWAP]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -627,30 +608,3 @@ body: |
%ext:_(s64) = G_ZEXT %2(s1)
$x0 = COPY %ext(s64)
RET_ReallyLR implicit $x0
-...
----
-name: freeze_zext_fcmp_fold_barrier
-body: |
- bb.1:
- liveins: $w0
-
- ; CHECK-LABEL: name: freeze_zext_fcmp_fold_barrier
- ; CHECK: liveins: $w0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- ; CHECK-NEXT: %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
- ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
- ; CHECK-NEXT: $x0 = COPY %ext(s64)
- ; CHECK-NEXT: RET_ReallyLR implicit $x0
- %0:_(s64) = COPY $x0
- %cst:_(s64) = G_CONSTANT i64 9
- %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
- %ext:_(s64) = G_ZEXT %cmp(s1)
- %2:_(s64) = G_FREEZE %ext
- $x0 = COPY %2(s64)
- RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
index 0c67a867580cc..c000a8e635bc6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
@@ -253,10 +253,10 @@ body: |
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 127
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s8>) = G_FREEZE [[BUILD_VECTOR]]
- ; CHECK-NEXT: G_STORE [[FREEZE]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
; CHECK-NEXT: RET_ReallyLR
%3:_(s8) = G_CONSTANT i8 127
%2:_(<32 x s8>) = G_BUILD_VECTOR %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
index ca403f8515611..767ece62b8731 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
@@ -24,8 +24,7 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C1]]
- ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]]
+ ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[C1]], [[C]]
; CHECK-NEXT: G_STORE [[UDIV]](s64), [[COPY]](p0) :: (store (s64))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-select.ll b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
index 6ad4a5ae572e0..9184066fc8107 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,175 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
; First test the different supported value types for select.
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
-; CHECK-LABEL: select_i1
-; CHECK: {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i1:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: tst w0, #0x1
+; GISEL-NEXT: csel w0, w1, w2, ne
+; GISEL-NEXT: ret
%1 = select i1 %c, i1 %a, i1 %b
ret i1 %1
}
define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) {
-; CHECK-LABEL: select_i8
-; CHECK: {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i8:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: tst w0, #0x1
+; GISEL-NEXT: csel w0, w1, w2, ne
+; GISEL-NEXT: ret
%1 = select i1 %c, i8 %a, i8 %b
ret i8 %1
}
define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) {
-; CHECK-LABEL: select_i16
-; CHECK: {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: tst w0, #0x1
+; GISEL-NEXT: csel w0, w1, w2, ne
+; GISEL-NEXT: ret
%1 = select i1 %c, i16 %a, i16 %b
ret i16 %1
}
define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) {
-; CHECK-LABEL: select_i32
-; CHECK: {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: tst w0, #0x1
+; GISEL-NEXT: csel w0, w1, w2, ne
+; GISEL-NEXT: ret
%1 = select i1 %c, i32 %a, i32 %b
ret i32 %1
}
define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) {
-; CHECK-LABEL: select_i64
-; CHECK: {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT: csel {{x[0-9]+}}, x1, x2, ne
+; GISEL-LABEL: select_i64:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: tst w0, #0x1
+; GISEL-NEXT: csel x0, x1, x2, ne
+; GISEL-NEXT: ret
%1 = select i1 %c, i64 %a, i64 %b
ret i64 %1
}
define float @select_f32(i1 zeroext %c, float %a, float %b) {
-; CHECK-LABEL: select_f32
-; CHECK: {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
-; GISEL-LABEL: select_f32
-; GISEL: {{cmp w0, #0|tst w0, #0x1}}
-; GISEL-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
+; GISEL-LABEL: select_f32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: tst w0, #0x1
+; GISEL-NEXT: fcsel s0, s0, s1, ne
+; GISEL-NEXT: ret
%1 = select i1 %c, float %a, float %b
ret float %1
}
define double @select_f64(i1 zeroext %c, double %a, double %b) {
-; CHECK-LABEL: select_f64
-; CHECK: {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
-; GISEL-LABEL: select_f64
-; GISEL: {{cmp w0, #0|tst w0, #0x1}}
-; GISEL-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
+; GISEL-LABEL: select_f64:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: tst w0, #0x1
+; GISEL-NEXT: fcsel d0, d0, d1, ne
+; GISEL-NEXT: ret
%1 = select i1 %c, double %a, double %b
ret double %1
}
; Now test the folding of all compares.
define float @select_fcmp_false(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_false
-; CHECK: fmov {{s[0-9]+}}, s2
+; CHECK-LABEL: select_fcmp_false:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fmov s0, s2
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_false:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s0
+; GISEL-NEXT: fcsel s0, s1, s2, gt
+; GISEL-NEXT: ret
%1 = fcmp ogt float %x, %x
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_ogt(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ogt
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, gt
+; CHECK-LABEL: select_fcmp_ogt:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, gt
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_ogt:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, gt
+; GISEL-NEXT: ret
%1 = fcmp ogt float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_oge(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_oge
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, ge
+; CHECK-LABEL: select_fcmp_oge:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, ge
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_oge:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, ge
+; GISEL-NEXT: ret
%1 = fcmp oge float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_olt(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_olt
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, mi
+; CHECK-LABEL: select_fcmp_olt:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, mi
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_olt:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, mi
+; GISEL-NEXT: ret
%1 = fcmp olt float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_ole(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ole
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, ls
+; CHECK-LABEL: select_fcmp_ole:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, ls
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_ole:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, ls
+; GISEL-NEXT: ret
%1 = fcmp ole float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_one(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_one
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel [[REG:s[0-9]+]], s2, s3, mi
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, [[REG]], gt
+; CHECK-LABEL: select_fcmp_one:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, mi
+; CHECK-NEXT: fcsel s0, s2, s0, gt
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_one:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: cset w8, mi
+; GISEL-NEXT: cset w9, gt
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: tst w8, #0x1
+; GISEL-NEXT: fcsel s0, s2, s3, ne
+; GISEL-NEXT: ret
%1 = fcmp one float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_ord(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ord
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, vc
+; CHECK-LABEL: select_fcmp_ord:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, vc
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_ord:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, vc
+; GISEL-NEXT: ret
%1 = fcmp ord float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_uno(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_uno
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, vs
+; CHECK-LABEL: select_fcmp_uno:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, vs
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_uno:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, vs
+; GISEL-NEXT: ret
%1 = fcmp uno float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_ueq(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ueq
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel [[REG:s[0-9]+]], s2, s3, eq
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, [[REG]], vs
+; CHECK-LABEL: select_fcmp_ueq:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, eq
+; CHECK-NEXT: fcsel s0, s2, s0, vs
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_ueq:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: cset w8, eq
+; GISEL-NEXT: cset w9, vs
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: tst w8, #0x1
+; GISEL-NEXT: fcsel s0, s2, s3, ne
+; GISEL-NEXT: ret
%1 = fcmp ueq float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_ugt(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ugt
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, hi
+; CHECK-LABEL: select_fcmp_ugt:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, hi
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_ugt:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, hi
+; GISEL-NEXT: ret
%1 = fcmp ugt float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_uge(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_uge
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, pl
+; CHECK-LABEL: select_fcmp_uge:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, pl
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_uge:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, pl
+; GISEL-NEXT: ret
%1 = fcmp uge float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_ult(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ult
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, lt
+; CHECK-LABEL: select_fcmp_ult:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, lt
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_ult:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, lt
+; GISEL-NEXT: ret
%1 = fcmp ult float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
@@ -177,116 +290,224 @@ define float @select_fcmp_ult(float %x, float %y, float %a, float %b) {
define float @select_fcmp_ule(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ule
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, le
+; CHECK-LABEL: select_fcmp_ule:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, le
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_ule:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, le
+; GISEL-NEXT: ret
%1 = fcmp ule float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_une
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s2, s3, ne
+; CHECK-LABEL: select_fcmp_une:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, ne
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_une:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s1
+; GISEL-NEXT: fcsel s0, s2, s3, ne
+; GISEL-NEXT: ret
%1 = fcmp une float %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_fcmp_true(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_true
-; CHECK: fmov {{s[0-9]+}}, s1
+; CHECK-LABEL: select_fcmp_true:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: fmov s0, s1
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_fcmp_true:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: fcmp s0, s0
+; GISEL-NEXT: cset w8, eq
+; GISEL-NEXT: cset w9, vs
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: tst w8, #0x1
+; GISEL-NEXT: fcsel s0, s1, s2, ne
+; GISEL-NEXT: ret
%1 = fcmp ueq float %x, %x
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_eq(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_eq
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, eq
+; CHECK-LABEL: select_icmp_eq:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, eq
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_eq:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, eq
+; GISEL-NEXT: ret
%1 = icmp eq i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_ne(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ne
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
+; CHECK-LABEL: select_icmp_ne:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_ne:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, ne
+; GISEL-NEXT: ret
%1 = icmp ne i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_ugt(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ugt
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, hi
+; CHECK-LABEL: select_icmp_ugt:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, hi
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_ugt:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, hi
+; GISEL-NEXT: ret
%1 = icmp ugt i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_uge(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_uge
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, hs
+; CHECK-LABEL: select_icmp_uge:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, hs
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_uge:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, hs
+; GISEL-NEXT: ret
%1 = icmp uge i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_ult(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ult
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, lo
+; CHECK-LABEL: select_icmp_ult:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, lo
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_ult:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, lo
+; GISEL-NEXT: ret
%1 = icmp ult i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_ule(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ule
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ls
+; CHECK-LABEL: select_icmp_ule:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, ls
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_ule:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, ls
+; GISEL-NEXT: ret
%1 = icmp ule i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_sgt(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_sgt
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, gt
+; CHECK-LABEL: select_icmp_sgt:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, gt
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_sgt:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, gt
+; GISEL-NEXT: ret
%1 = icmp sgt i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_sge(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_sge
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ge
+; CHECK-LABEL: select_icmp_sge:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, ge
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_sge:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, ge
+; GISEL-NEXT: ret
%1 = icmp sge i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_slt(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_slt
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, lt
+; CHECK-LABEL: select_icmp_slt:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, lt
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_slt:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, lt
+; GISEL-NEXT: ret
%1 = icmp slt i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
}
define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_sle
-; CHECK: cmp w0, w1
-; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, le
+; CHECK-LABEL: select_icmp_sle:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: fcsel s0, s0, s1, le
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: select_icmp_sle:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmp w0, w1
+; GISEL-NEXT: fcsel s0, s0, s1, le
+; GISEL-NEXT: ret
%1 = icmp sle i32 %x, %y
%2 = select i1 %1, float %a, float %b
ret float %2
@@ -294,30 +515,41 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
; Test peephole optimizations for select.
define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt1
-; CHECK: orr {{w[0-9]+}}, w0, w1
+; GISEL-LABEL: select_opt1:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: orr w8, w0, w1
+; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: ret
%1 = select i1 %c, i1 true, i1 %a
ret i1 %1
}
define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt2
-; CHECK: eor [[REG:w[0-9]+]], w0, #0x1
-; CHECK: orr {{w[0-9]+}}, [[REG]], w1
+; GISEL-LABEL: select_opt2:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: eor w8, w0, #0x1
+; GISEL-NEXT: orr w8, w8, w1
+; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: ret
%1 = select i1 %c, i1 %a, i1 true
ret i1 %1
}
define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt3
-; CHECK: bic {{w[0-9]+}}, w1, w0
+; GISEL-LABEL: select_opt3:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: eor w8, w0, #0x1
+; GISEL-NEXT: and w0, w8, w1
+; GISEL-NEXT: ret
%1 = select i1 %c, i1 false, i1 %a
ret i1 %1
}
define zeroext i1 @select_opt4(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt4
-; CHECK: and {{w[0-9]+}}, w0, w1
+; GISEL-LABEL: select_opt4:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: and w0, w0, w1
+; GISEL-NEXT: ret
%1 = select i1 %c, i1 %a, i1 false
ret i1 %1
}
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index b2f9bf89d9ec6..7d8eba1e87080 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1509,49 +1509,39 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3
; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v12, v1
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v12, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v12, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v10, v4
; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v10, v3
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v10, v2
; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v1, s[6:7], v1, v12
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[6:7], v4, v12, s[6:7]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v3, v10, s[6:7]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v2, v10, s[6:7]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v14
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[6:7], v2, v12, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v4, v10, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[6:7], v3, v10, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v14
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v16
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v11, v5
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v11, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v8
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v11, v5
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v11, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v14
; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v9, v7
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v9, v6
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v5, s[6:7], v5, v11
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v15, s[6:7], v8, v11, s[6:7]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v14, s[6:7], v7, v9, s[6:7]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v13, s[6:7], v6, v9, s[6:7]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v13
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v9, v4
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v8, s[6:7], v8, v11
; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v5, v11, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[6:7], v7, v9, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[6:7], v4, v9, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: v_xor_b32_e64 v13, v11, v12
; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: v_xor_b32_e64 v11, v11, v12
@@ -1560,97 +1550,69 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: v_xor_b32_e64 v9, v9, v10
; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v10, v11
+; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v8, v7
+; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v5, v4
; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v10, v11
+; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v1, v6
+; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v2, v3
; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v11
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
-; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
-; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v6
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32
+; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9
+; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v8
; GFX9-G-O0-NEXT: s_mov_b32 s10, 64
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10
-; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v5, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10
+; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4
; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
+; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[8:9]
; GFX9-G-O0-NEXT: s_mov_b32 s16, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], v[9:10]
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v2
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT: v_min_u32_e64 v6, v6, v7
+; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
-; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v8
+; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v5, v7
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v3
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32
; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9
-; GFX9-G-O0-NEXT: v_min_u32_e64 v6, v6, v8
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[8:9]
; GFX9-G-O0-NEXT: s_mov_b32 s15, 0
; GFX9-G-O0-NEXT: s_mov_b32 s11, 0
; GFX9-G-O0-NEXT: s_mov_b32 s14, 0
; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v6, s[8:9], v5, v6
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v7, s[8:9], v4, v5
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s16
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s16
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s16
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v5, v7, s[8:9]
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[8:9], v4, v5, s[8:9]
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s15
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s14
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9]
@@ -1659,8 +1621,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9]
; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v9
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v8
; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5
@@ -1685,35 +1647,27 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v10
; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s7
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v7, s6
-; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v6, v9
-; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v7, v8
-; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
-; GFX9-G-O0-NEXT: v_and_b32_e32 v1, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v7, s7
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v4, s6
+; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v7, v9
+; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v8
+; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[7:8], v[9:10]
+; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v5
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v2, v4, s[6:7]
; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
+; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v5
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7]
; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
@@ -1883,10 +1837,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-G-O0-NEXT: v_readlane_b32 s6, v16, 6
; GFX9-G-O0-NEXT: v_readlane_b32 s7, v16, 7
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
@@ -1899,14 +1853,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2
@@ -1915,7 +1869,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
; GFX9-G-O0-NEXT: s_mov_b32 s8, 1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[21:22], v2, v[0:1]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[14:15], v2, v[0:1]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[3:4]
; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
@@ -1929,9 +1883,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v2, v3
; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v0, v1
-; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr23_vgpr24 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v25
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v26
; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
; GFX9-G-O0-NEXT: s_mov_b32 s9, 31
@@ -1939,81 +1893,73 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1
; GFX9-G-O0-NEXT: s_mov_b32 s9, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v15
; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3
; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v23
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v25
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v26
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v0, v[2:3]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[27:28], v0, v[2:3]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[12:13]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr2 killed $exec
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[14:15]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr2 killed $exec
; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v14, v2, v3
+; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v23, v2, v3
; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31
; GFX9-G-O0-NEXT: v_mov_b32_e32 v30, v32
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v33
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v34
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v25, v33
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v26, v34
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v29
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v30
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v23
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v24
-; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v15
-; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v1, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v27
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v28
+; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v24
+; GFX9-G-O0-NEXT: v_or_b32_e64 v15, v1, v15
; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v22
-; GFX9-G-O0-NEXT: v_or3_b32 v12, v12, v14, v15
-; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v13
-; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v25
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v26
+; GFX9-G-O0-NEXT: v_or3_b32 v14, v14, v23, v24
+; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v15
+; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v2
; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v15
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v11, s[8:9], v11, v4
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v6, v5, s[8:9]
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v13, s[8:9], v13, v4
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9]
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9]
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v6, v5, s[8:9]
; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
-; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v8, v6, v10
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v6, v12
; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
-; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v6, v6, v10
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v6, v6, v12
; GFX9-G-O0-NEXT: s_mov_b32 s9, 1
; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
-; GFX9-G-O0-NEXT: v_and_b32_e64 v12, v8, s9
-; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, s8
+; GFX9-G-O0-NEXT: v_and_b32_e64 v12, v10, s9
+; GFX9-G-O0-NEXT: v_and_b32_e64 v14, v10, s8
; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, s4
; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v25
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v26
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v27
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v28
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v23
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v24
-; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v8, v11
-; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v23
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v24
+; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v10, v11
+; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v10, v22
; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8
; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v21
; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11
@@ -2114,66 +2060,62 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b32 s4, 64
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v6
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v17
+; GFX9-G-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v13, v4
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v19, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v13
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v19
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v13, v6
+; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v19, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6
-; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v13, v6
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v13, v[21:22]
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[26:27], v13, v[15:16]
+; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v19, v6
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v19, v[21:22]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[26:27], v19, v[23:24]
; GFX9-G-O0-NEXT: v_lshlrev_b64 v[24:25], v5, v[21:22]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v26
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v26
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v27
; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v25
-; GFX9-G-O0-NEXT: v_or_b32_e64 v14, v14, v23
-; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v5, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v25
+; GFX9-G-O0-NEXT: v_or_b32_e64 v20, v20, v23
+; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v5, v19
; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0
; GFX9-G-O0-NEXT: v_lshrrev_b64 v[21:22], v4, v[21:22]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v21
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v22
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[4:5]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v16
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v13, v5, v13, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v19, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v18, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v17, v5, v17, s[6:7]
; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v6
; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v17, v17, v18, s[4:5]
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v6
; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v18
; GFX9-G-O0-NEXT: s_mov_b32 s4, -1
; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
; GFX9-G-O0-NEXT: s_mov_b32 s7, -1
@@ -2226,14 +2168,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
; GFX9-G-O0-NEXT: s_mov_b32 s6, 1
; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
@@ -2241,48 +2183,50 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s6
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v2, v5
+; GFX9-G-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v3, v5
; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v4, v6, s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s9
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v3, v4, s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v1, v3, s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v7
-; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v6, v8, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s9
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v7, v8, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s8
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v7, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v7
+; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[6:7], v1, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v9, s[6:7], v2, v3
; GFX9-G-O0-NEXT: s_mov_b32 s7, 64
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v9
+; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v3, v4, v1
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v3, v9, v1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v9, v1, v4
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v15, v1, v9
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v4, v1
+; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v9, v1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, v1
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[1:2], v4, v[13:14]
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[18:19], v9, v[13:14]
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[16:17], v4, v[11:12]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v19
+; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v9, v1
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[1:2], v9, v[13:14]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[18:19], v15, v[13:14]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[16:17], v9, v[11:12]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v19
; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
-; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v10, v15
-; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17
+; GFX9-G-O0-NEXT: v_or_b32_e64 v12, v12, v15
+; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v9, v11
; GFX9-G-O0-NEXT: v_lshlrev_b64 v[13:14], v3, v[13:14]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v2
@@ -2294,10 +2238,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v13
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v14
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[8:9]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v12
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v12, s[8:9]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[8:9]
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[6:7]
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7]
; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
>From 929566d2b08a9c64d768bfbce2c4fec21c34ab72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 24 May 2024 13:09:28 +0200
Subject: [PATCH 4/6] add observer
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 +++--
.../CodeGen/AArch64/GlobalISel/combine-freeze.mir | 12 ++++++------
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index abecee4259030..23b8403657b26 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -265,8 +265,9 @@ bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
}
}
- // FIXME: observer must be aware of dropping
- // cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+ Observer.changingInstr(*OrigDef);
+ cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+ Observer.changedInstr(*OrigDef);
// Eliminate freeze if all operands are guaranteed non-poison.
if (!MaybePoisonOperand) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index dfa45d96fe94c..0d41be09747c9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -189,8 +189,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
- ; CHECK-NEXT: %3:_(s64) = nneg G_ZEXT %c(s32)
- ; CHECK-NEXT: $x0 = COPY %3(s64)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
+ ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
%cst:_(s32) = G_CONSTANT i32 9
@@ -255,7 +255,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = exact G_UDIV %c, %c
+ ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV %c, %c
; CHECK-NEXT: $x0 = COPY [[UDIV]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -299,7 +299,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = nsw G_MUL %c, %c
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL %c, %c
; CHECK-NEXT: $x0 = COPY [[MUL]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -343,7 +343,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = nuw G_TRUNC %c(s64)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
@@ -387,7 +387,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD %c, %c
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %c, %c
; CHECK-NEXT: $x0 = COPY [[ADD]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
>From 62a99c80e27ca964952c05e3800fac4fa0d16193 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 24 May 2024 13:47:26 +0200
Subject: [PATCH 5/6] remove freeze combine
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 --
.../include/llvm/Target/GlobalISel/Combine.td | 8 ---
.../GlobalISel/CombinerHelperVectorOps.cpp | 53 -------------------
3 files changed, 65 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 2111e82e1a99d..2ddf20ebe7af7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -840,10 +840,6 @@ class CombinerHelper {
/// Combine extract vector element.
bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo);
- /// Combine extract vector element with freeze on the vector register.
- bool matchExtractVectorElementWithFreeze(const MachineOperand &MO,
- BuildFnTy &MatchInfo);
-
/// Combine extract vector element with a build vector on the vector register.
bool matchExtractVectorElementWithBuildVector(const MachineOperand &MO,
BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 47f73daf20891..36001162b3dc8 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1532,13 +1532,6 @@ def extract_vector_element_build_vector_trunc8 : GICombineRule<
[{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
-def extract_vector_element_freeze : GICombineRule<
- (defs root:$root, build_fn_matchinfo:$matchinfo),
- (match (G_FREEZE $src, $input),
- (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
- [{ return Helper.matchExtractVectorElementWithFreeze(${root}, ${matchinfo}); }]),
- (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
-
def sext_trunc : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_TRUNC $src, $x, (MIFlags NoSWrap)),
@@ -1636,7 +1629,6 @@ extract_vector_element_build_vector_trunc5,
extract_vector_element_build_vector_trunc6,
extract_vector_element_build_vector_trunc7,
extract_vector_element_build_vector_trunc8,
-//extract_vector_element_freeze,
extract_vector_element_shuffle_vector,
insert_vector_element_extract_vector_element
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index 21b1eb2628174..b4765fb280f9d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -144,59 +144,6 @@ bool CombinerHelper::matchExtractVectorElementWithDifferentIndices(
return false;
}
-bool CombinerHelper::matchExtractVectorElementWithFreeze(
- const MachineOperand &MO, BuildFnTy &MatchInfo) {
- MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
- GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
-
- Register Vector = Extract->getVectorReg();
-
- //
- // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
- // %freeze:_(<2 x s32>) = G_FREEZE %bv(<2 x s32>)
- // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
- //
- // -->
- //
- // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
- // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
- // %freeze:_(s32) = G_FREEZE %extract(s32)
- //
- //
-
- // For G_FREEZE, the input and the output types are identical. Moving the
- // freeze from the Vector into the front of the extract preserves the freeze
- // semantics. The result is still freeze'd. Furthermore, the Vector register
- // becomes easier to analyze. A build vector could have been hidden behind the
- // freeze.
-
- // We expect a freeze on the Vector register.
- GFreeze *Freeze = getOpcodeDef<GFreeze>(Vector, MRI);
- if (!Freeze)
- return false;
-
- Register Dst = Extract->getReg(0);
- LLT DstTy = MRI.getType(Dst);
-
- // We first have to check for one-use and legality of the freeze.
- // The type of the extractVectorElement did not change.
- if (!MRI.hasOneNonDBGUse(Freeze->getReg(0)) ||
- !isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}}))
- return false;
-
- Register Index = Extract->getIndexReg();
-
- // We move the freeze from the Vector register in front of the
- // extractVectorElement.
- MatchInfo = [=](MachineIRBuilder &B) {
- auto Extract =
- B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index);
- B.buildFreeze(Dst, Extract);
- };
-
- return true;
-}
-
bool CombinerHelper::matchExtractVectorElementWithBuildVector(
const MachineOperand &MO, BuildFnTy &MatchInfo) {
MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
>From 017df6bd4f47b369992acecfa111fa9900c5104c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 25 May 2024 17:40:48 +0200
Subject: [PATCH 6/6] address review comments
---
.../include/llvm/Target/GlobalISel/Combine.td | 4 +-
.../AArch64/GlobalISel/combine-freeze.mir | 7 +-
llvm/test/CodeGen/AArch64/fast-isel-select.ll | 222 ++++++++++++++----
3 files changed, 183 insertions(+), 50 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 36001162b3dc8..383589add7755 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1705,14 +1705,14 @@ def integer_reassoc_combines: GICombineGroup<[
APlusBMinusCPlusA
]>;
-def freeze_of_not_undef_and_poison : GICombineRule<
+def freeze_of_non_undef_non_poison : GICombineRule<
(defs root:$root),
(match (G_FREEZE $root, $src),
[{ return isGuaranteedNotToBeUndefOrPoison(${src}.getReg(), MRI); }]),
(apply (GIReplaceReg $root, $src))>;
def freeze_combines: GICombineGroup<[
- freeze_of_not_undef_and_poison,
+ freeze_of_non_undef_non_poison,
push_freeze_to_prevent_poison_from_propagating
]>;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index 0d41be09747c9..2450f109c406a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -16,8 +16,8 @@ body: |
; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
- %2:_(s64) = G_FREEZE %0
- $x0 = COPY %2(s64)
+ %1:_(s64) = G_FREEZE %0
+ $x0 = COPY %1(s64)
RET_ReallyLR implicit $x0
...
@@ -33,7 +33,6 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 9
; CHECK-NEXT: $x0 = COPY [[C]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
- %0:_(s64) = COPY $x0
%1:_(s64) = G_CONSTANT i64 9
%2:_(s64) = G_FREEZE %1
$x0 = COPY %2(s64)
@@ -52,7 +51,6 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 9.000000e+00
; CHECK-NEXT: $x0 = COPY [[C]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
- %0:_(s64) = COPY $x0
%1:_(s64) = G_FCONSTANT double 9.0
%2:_(s64) = G_FREEZE %1
$x0 = COPY %2(s64)
@@ -71,7 +69,6 @@ body: |
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[DEF]]
; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
- %0:_(s64) = COPY $x0
%1:_(s64) = G_IMPLICIT_DEF
%2:_(s64) = G_FREEZE %1
$x0 = COPY %2(s64)
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-select.ll b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
index 9184066fc8107..65701343ccc1e 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FASTISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL
; First test the different supported value types for select.
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -10,6 +10,18 @@ define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
; GISEL-NEXT: tst w0, #0x1
; GISEL-NEXT: csel w0, w1, w2, ne
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_i1:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: tst w0, #0x1
+; CHECK-FASTISEL-NEXT: csel w8, w1, w2, ne
+; CHECK-FASTISEL-NEXT: and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_i1:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: tst w0, #0x1
+; CHECK-GISEL-NEXT: csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i1 %a, i1 %b
ret i1 %1
}
@@ -20,6 +32,18 @@ define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) {
; GISEL-NEXT: tst w0, #0x1
; GISEL-NEXT: csel w0, w1, w2, ne
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_i8:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: tst w0, #0x1
+; CHECK-FASTISEL-NEXT: csel w8, w1, w2, ne
+; CHECK-FASTISEL-NEXT: uxtb w0, w8
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_i8:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: tst w0, #0x1
+; CHECK-GISEL-NEXT: csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i8 %a, i8 %b
ret i8 %1
}
@@ -30,6 +54,18 @@ define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) {
; GISEL-NEXT: tst w0, #0x1
; GISEL-NEXT: csel w0, w1, w2, ne
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_i16:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: tst w0, #0x1
+; CHECK-FASTISEL-NEXT: csel w8, w1, w2, ne
+; CHECK-FASTISEL-NEXT: uxth w0, w8
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_i16:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: tst w0, #0x1
+; CHECK-GISEL-NEXT: csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i16 %a, i16 %b
ret i16 %1
}
@@ -40,6 +76,17 @@ define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) {
; GISEL-NEXT: tst w0, #0x1
; GISEL-NEXT: csel w0, w1, w2, ne
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_i32:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: tst w0, #0x1
+; CHECK-FASTISEL-NEXT: csel w0, w1, w2, ne
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_i32:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: tst w0, #0x1
+; CHECK-GISEL-NEXT: csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i32 %a, i32 %b
ret i32 %1
}
@@ -50,6 +97,17 @@ define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) {
; GISEL-NEXT: tst w0, #0x1
; GISEL-NEXT: csel x0, x1, x2, ne
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_i64:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: tst w0, #0x1
+; CHECK-FASTISEL-NEXT: csel x0, x1, x2, ne
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_i64:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: tst w0, #0x1
+; CHECK-GISEL-NEXT: csel x0, x1, x2, ne
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i64 %a, i64 %b
ret i64 %1
}
@@ -60,6 +118,17 @@ define float @select_f32(i1 zeroext %c, float %a, float %b) {
; GISEL-NEXT: tst w0, #0x1
; GISEL-NEXT: fcsel s0, s0, s1, ne
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_f32:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: tst w0, #0x1
+; CHECK-FASTISEL-NEXT: fcsel s0, s0, s1, ne
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_f32:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: tst w0, #0x1
+; CHECK-GISEL-NEXT: fcsel s0, s0, s1, ne
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, float %a, float %b
ret float %1
}
@@ -70,17 +139,33 @@ define double @select_f64(i1 zeroext %c, double %a, double %b) {
; GISEL-NEXT: tst w0, #0x1
; GISEL-NEXT: fcsel d0, d0, d1, ne
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_f64:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: tst w0, #0x1
+; CHECK-FASTISEL-NEXT: fcsel d0, d0, d1, ne
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_f64:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: tst w0, #0x1
+; CHECK-GISEL-NEXT: fcsel d0, d0, d1, ne
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, double %a, double %b
ret double %1
}
; Now test the folding of all compares.
define float @select_fcmp_false(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_false:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: fmov s0, s2
-; CHECK-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_fcmp_false:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: fmov s0, s2
+; CHECK-FASTISEL-NEXT: ret
;
+; CHECK-GISEL-LABEL: select_fcmp_false:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: fcmp s0, s0
+; CHECK-GISEL-NEXT: fcsel s0, s1, s2, gt
+; CHECK-GISEL-NEXT: ret
; GISEL-LABEL: select_fcmp_false:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s0
@@ -97,7 +182,6 @@ define float @select_fcmp_ogt(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, gt
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_ogt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -114,7 +198,6 @@ define float @select_fcmp_oge(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, ge
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_oge:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -131,7 +214,6 @@ define float @select_fcmp_olt(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, mi
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_olt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -148,7 +230,6 @@ define float @select_fcmp_ole(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, ls
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_ole:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -160,13 +241,22 @@ define float @select_fcmp_ole(float %x, float %y, float %a, float %b) {
}
define float @select_fcmp_one(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_one:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: fcmp s0, s1
-; CHECK-NEXT: fcsel s0, s2, s3, mi
-; CHECK-NEXT: fcsel s0, s2, s0, gt
-; CHECK-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_fcmp_one:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: fcmp s0, s1
+; CHECK-FASTISEL-NEXT: fcsel s0, s2, s3, mi
+; CHECK-FASTISEL-NEXT: fcsel s0, s2, s0, gt
+; CHECK-FASTISEL-NEXT: ret
;
+; CHECK-GISEL-LABEL: select_fcmp_one:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: fcmp s0, s1
+; CHECK-GISEL-NEXT: cset w8, mi
+; CHECK-GISEL-NEXT: cset w9, gt
+; CHECK-GISEL-NEXT: orr w8, w8, w9
+; CHECK-GISEL-NEXT: tst w8, #0x1
+; CHECK-GISEL-NEXT: fcsel s0, s2, s3, ne
+; CHECK-GISEL-NEXT: ret
; GISEL-LABEL: select_fcmp_one:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -187,7 +277,6 @@ define float @select_fcmp_ord(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, vc
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_ord:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -204,7 +293,6 @@ define float @select_fcmp_uno(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, vs
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_uno:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -216,13 +304,22 @@ define float @select_fcmp_uno(float %x, float %y, float %a, float %b) {
}
define float @select_fcmp_ueq(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ueq:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: fcmp s0, s1
-; CHECK-NEXT: fcsel s0, s2, s3, eq
-; CHECK-NEXT: fcsel s0, s2, s0, vs
-; CHECK-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_fcmp_ueq:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: fcmp s0, s1
+; CHECK-FASTISEL-NEXT: fcsel s0, s2, s3, eq
+; CHECK-FASTISEL-NEXT: fcsel s0, s2, s0, vs
+; CHECK-FASTISEL-NEXT: ret
;
+; CHECK-GISEL-LABEL: select_fcmp_ueq:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: fcmp s0, s1
+; CHECK-GISEL-NEXT: cset w8, eq
+; CHECK-GISEL-NEXT: cset w9, vs
+; CHECK-GISEL-NEXT: orr w8, w8, w9
+; CHECK-GISEL-NEXT: tst w8, #0x1
+; CHECK-GISEL-NEXT: fcsel s0, s2, s3, ne
+; CHECK-GISEL-NEXT: ret
; GISEL-LABEL: select_fcmp_ueq:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -243,7 +340,6 @@ define float @select_fcmp_ugt(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, hi
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_ugt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -260,7 +356,6 @@ define float @select_fcmp_uge(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, pl
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_uge:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -277,7 +372,6 @@ define float @select_fcmp_ult(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, lt
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_ult:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -295,7 +389,6 @@ define float @select_fcmp_ule(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, le
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_ule:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -312,7 +405,6 @@ define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, ne
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_fcmp_une:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s1
@@ -324,11 +416,20 @@ define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
}
define float @select_fcmp_true(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_true:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: fmov s0, s1
-; CHECK-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_fcmp_true:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: fmov s0, s1
+; CHECK-FASTISEL-NEXT: ret
;
+; CHECK-GISEL-LABEL: select_fcmp_true:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: fcmp s0, s0
+; CHECK-GISEL-NEXT: cset w8, eq
+; CHECK-GISEL-NEXT: cset w9, vs
+; CHECK-GISEL-NEXT: orr w8, w8, w9
+; CHECK-GISEL-NEXT: tst w8, #0x1
+; CHECK-GISEL-NEXT: fcsel s0, s1, s2, ne
+; CHECK-GISEL-NEXT: ret
; GISEL-LABEL: select_fcmp_true:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp s0, s0
@@ -349,7 +450,6 @@ define float @select_icmp_eq(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, eq
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_eq:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -366,7 +466,6 @@ define float @select_icmp_ne(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, ne
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_ne:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -383,7 +482,6 @@ define float @select_icmp_ugt(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, hi
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_ugt:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -400,7 +498,6 @@ define float @select_icmp_uge(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, hs
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_uge:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -417,7 +514,6 @@ define float @select_icmp_ult(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, lo
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_ult:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -434,7 +530,6 @@ define float @select_icmp_ule(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, ls
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_ule:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -451,7 +546,6 @@ define float @select_icmp_sgt(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, gt
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_sgt:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -468,7 +562,6 @@ define float @select_icmp_sge(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, ge
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_sge:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -485,7 +578,6 @@ define float @select_icmp_slt(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_slt:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -502,7 +594,6 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: fcsel s0, s0, s1, le
; CHECK-NEXT: ret
-;
; GISEL-LABEL: select_icmp_sle:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
@@ -520,6 +611,17 @@ define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
; GISEL-NEXT: orr w8, w0, w1
; GISEL-NEXT: and w0, w8, #0x1
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_opt1:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: orr w8, w0, w1
+; CHECK-FASTISEL-NEXT: and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_opt1:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: orr w8, w0, w1
+; CHECK-GISEL-NEXT: and w0, w8, #0x1
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i1 true, i1 %a
ret i1 %1
}
@@ -531,6 +633,19 @@ define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
; GISEL-NEXT: orr w8, w8, w1
; GISEL-NEXT: and w0, w8, #0x1
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_opt2:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: eor w8, w0, #0x1
+; CHECK-FASTISEL-NEXT: orr w8, w8, w1
+; CHECK-FASTISEL-NEXT: and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_opt2:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: eor w8, w0, #0x1
+; CHECK-GISEL-NEXT: orr w8, w8, w1
+; CHECK-GISEL-NEXT: and w0, w8, #0x1
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i1 %a, i1 true
ret i1 %1
}
@@ -541,6 +656,17 @@ define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
; GISEL-NEXT: eor w8, w0, #0x1
; GISEL-NEXT: and w0, w8, w1
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_opt3:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: bic w8, w1, w0
+; CHECK-FASTISEL-NEXT: and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_opt3:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: eor w8, w0, #0x1
+; CHECK-GISEL-NEXT: and w0, w8, w1
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i1 false, i1 %a
ret i1 %1
}
@@ -550,6 +676,16 @@ define zeroext i1 @select_opt4(i1 zeroext %c, i1 zeroext %a) {
; GISEL: ; %bb.0:
; GISEL-NEXT: and w0, w0, w1
; GISEL-NEXT: ret
+; CHECK-FASTISEL-LABEL: select_opt4:
+; CHECK-FASTISEL: ; %bb.0:
+; CHECK-FASTISEL-NEXT: and w8, w0, w1
+; CHECK-FASTISEL-NEXT: and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT: ret
+;
+; CHECK-GISEL-LABEL: select_opt4:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: and w0, w0, w1
+; CHECK-GISEL-NEXT: ret
%1 = select i1 %c, i1 %a, i1 false
ret i1 %1
}
More information about the llvm-commits
mailing list