[llvm] 4b91949 - [GlobalIsel] Combine selects with constants (#76089)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 2 08:26:43 PST 2024
Author: Thorsten Schütt
Date: 2024-01-02T17:26:39+01:00
New Revision: 4b9194952d73c34d4d58a5dc3aeddead130b5f0e
URL: https://github.com/llvm/llvm-project/commit/4b9194952d73c34d4d58a5dc3aeddead130b5f0e
DIFF: https://github.com/llvm/llvm-project/commit/4b9194952d73c34d4d58a5dc3aeddead130b5f0e.diff
LOG: [GlobalIsel] Combine selects with constants (#76089)
A first small step at combining selects.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
llvm/test/CodeGen/AArch64/andcompare.ll
llvm/test/CodeGen/AArch64/arm64-ccmp.ll
llvm/test/CodeGen/AArch64/call-rv-marker.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
llvm/test/CodeGen/AMDGPU/fptrunc.ll
llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
llvm/test/CodeGen/AMDGPU/rsq.f64.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index e7debc652a0a8b..dcc1a4580b14a2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -769,9 +769,6 @@ class CombinerHelper {
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI,
BuildFnTy &MatchInfo);
- /// Fold boolean selects to logical operations.
- bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo);
-
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info);
/// Transform G_ADD(x, G_SUB(y, x)) to y.
@@ -814,6 +811,9 @@ class CombinerHelper {
// Given a binop \p MI, commute operands 1 and 2.
void applyCommuteBinOpOperands(MachineInstr &MI);
+ /// Combine selects.
+ bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -904,6 +904,18 @@ class CombinerHelper {
/// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0
bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal,
Register FalseVal, BuildFnTy &MatchInfo);
+
+ /// Try to fold selects to logical operations.
+ bool tryFoldBoolSelectToLogic(GSelect *Select, BuildFnTy &MatchInfo);
+
+ bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo);
+
+ bool isOneOrOneSplat(Register Src, bool AllowUndefs);
+ bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
+ bool isConstantSplatVector(Register Src, int64_t SplatValue,
+ bool AllowUndefs);
+
+ std::optional<APInt> getConstantOrConstantSplatVector(Register Src);
};
} // namespace llvm
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 77db371adaf776..6bda80681432a0 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -437,13 +437,6 @@ def select_constant_cmp: GICombineRule<
(apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }])
>;
-def select_to_logical : GICombineRule<
- (defs root:$root, build_fn_matchinfo:$matchinfo),
- (match (wip_match_opcode G_SELECT):$root,
- [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]),
- (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
->;
-
// Fold (C op x) -> (x op C)
// TODO: handle more isCommutable opcodes
// TODO: handle compares (currently not marked as isCommutable)
@@ -1242,6 +1235,12 @@ def select_to_minmax: GICombineRule<
[{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def match_selects : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SELECT):$root,
+ [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -1282,7 +1281,7 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
def phi_combines : GICombineGroup<[extend_through_phis]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
- select_to_logical]>;
+ match_selects]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one, idempotent_prop]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 91a64d59e154df..8b15bdb0aca30b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5940,62 +5940,6 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
return false;
}
-bool CombinerHelper::matchSelectToLogical(MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- GSelect &Sel = cast<GSelect>(MI);
- Register DstReg = Sel.getReg(0);
- Register Cond = Sel.getCondReg();
- Register TrueReg = Sel.getTrueReg();
- Register FalseReg = Sel.getFalseReg();
-
- auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI);
- auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI);
-
- const LLT CondTy = MRI.getType(Cond);
- const LLT OpTy = MRI.getType(TrueReg);
- if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1)
- return false;
-
- // We have a boolean select.
-
- // select Cond, Cond, F --> or Cond, F
- // select Cond, 1, F --> or Cond, F
- auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI);
- if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildOr(DstReg, Cond, FalseReg);
- };
- return true;
- }
-
- // select Cond, T, Cond --> and Cond, T
- // select Cond, T, 0 --> and Cond, T
- auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI);
- if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildAnd(DstReg, Cond, TrueReg);
- };
- return true;
- }
-
- // select Cond, T, 1 --> or (not Cond), T
- if (MaybeCstFalse && MaybeCstFalse->isOne()) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg);
- };
- return true;
- }
-
- // select Cond, 0, F --> and (not Cond), F
- if (MaybeCstTrue && MaybeCstTrue->isZero()) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg);
- };
- return true;
- }
- return false;
-}
-
bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
unsigned &IdxToPropagate) {
bool PropagateNaN;
@@ -6318,3 +6262,300 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
MI.getOperand(2).setReg(LHSReg);
Observer.changedInstr(MI);
}
+
+bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
+ LLT SrcTy = MRI.getType(Src);
+ if (SrcTy.isFixedVector())
+ return isConstantSplatVector(Src, 1, AllowUndefs);
+ if (SrcTy.isScalar()) {
+ if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
+ return true;
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ return IConstant && IConstant->Value == 1;
+ }
+ return false; // scalable vector
+}
+
+bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
+ LLT SrcTy = MRI.getType(Src);
+ if (SrcTy.isFixedVector())
+ return isConstantSplatVector(Src, 0, AllowUndefs);
+ if (SrcTy.isScalar()) {
+ if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
+ return true;
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ return IConstant && IConstant->Value == 0;
+ }
+ return false; // scalable vector
+}
+
+// Ignores COPYs during conformance checks.
+// FIXME scalable vectors.
+bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
+ bool AllowUndefs) {
+ GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BuildVector)
+ return false;
+ unsigned NumSources = BuildVector->getNumSources();
+
+ for (unsigned I = 0; I < NumSources; ++I) {
+ GImplicitDef *ImplicitDef =
+ getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
+ if (ImplicitDef && AllowUndefs)
+ continue;
+ if (ImplicitDef && !AllowUndefs)
+ return false;
+ std::optional<ValueAndVReg> IConstant =
+ getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+ if (IConstant && IConstant->Value == SplatValue)
+ continue;
+ return false;
+ }
+ return true;
+}
+
+// Ignores COPYs during lookups.
+// FIXME scalable vectors
+std::optional<APInt>
+CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ if (IConstant)
+ return IConstant->Value;
+
+ GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BuildVector)
+ return std::nullopt;
+ unsigned NumSources = BuildVector->getNumSources();
+
+ std::optional<APInt> Value = std::nullopt;
+ for (unsigned I = 0; I < NumSources; ++I) {
+ std::optional<ValueAndVReg> IConstant =
+ getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+ if (!IConstant)
+ return std::nullopt;
+ if (!Value)
+ Value = IConstant->Value;
+ else if (*Value != IConstant->Value)
+ return std::nullopt;
+ }
+ return Value;
+}
+
+// TODO: use knownbits to determine zeros
+bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
+ BuildFnTy &MatchInfo) {
+ uint32_t Flags = Select->getFlags();
+ Register Dest = Select->getReg(0);
+ Register Cond = Select->getCondReg();
+ Register True = Select->getTrueReg();
+ Register False = Select->getFalseReg();
+ LLT CondTy = MRI.getType(Select->getCondReg());
+ LLT TrueTy = MRI.getType(Select->getTrueReg());
+
+ // We only do this combine for scalar boolean conditions.
+ if (CondTy != LLT::scalar(1))
+ return false;
+
+ // Both are scalars.
+ std::optional<ValueAndVReg> TrueOpt =
+ getIConstantVRegValWithLookThrough(True, MRI);
+ std::optional<ValueAndVReg> FalseOpt =
+ getIConstantVRegValWithLookThrough(False, MRI);
+
+ if (!TrueOpt || !FalseOpt)
+ return false;
+
+ APInt TrueValue = TrueOpt->Value;
+ APInt FalseValue = FalseOpt->Value;
+
+ // select Cond, 1, 0 --> zext (Cond)
+ if (TrueValue.isOne() && FalseValue.isZero()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ B.buildZExtOrTrunc(Dest, Cond);
+ };
+ return true;
+ }
+
+ // select Cond, -1, 0 --> sext (Cond)
+ if (TrueValue.isAllOnes() && FalseValue.isZero()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ B.buildSExtOrTrunc(Dest, Cond);
+ };
+ return true;
+ }
+
+ // select Cond, 0, 1 --> zext (!Cond)
+ if (TrueValue.isZero() && FalseValue.isOne()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ B.buildZExtOrTrunc(Dest, Inner);
+ };
+ return true;
+ }
+
+ // select Cond, 0, -1 --> sext (!Cond)
+ if (TrueValue.isZero() && FalseValue.isAllOnes()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ B.buildSExtOrTrunc(Dest, Inner);
+ };
+ return true;
+ }
+
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (TrueValue - 1 == FalseValue) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Inner, Cond);
+ B.buildAdd(Dest, Inner, False);
+ };
+ return true;
+ }
+
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (TrueValue + 1 == FalseValue) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildSExtOrTrunc(Inner, Cond);
+ B.buildAdd(Dest, Inner, False);
+ };
+ return true;
+ }
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Inner, Cond);
+ // The shift amount must be scalar.
+ LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
+ auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
+ B.buildShl(Dest, Inner, ShAmtC, Flags);
+ };
+ return true;
+ }
+ // select Cond, -1, C --> or (sext Cond), C
+ if (TrueValue.isAllOnes()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildSExtOrTrunc(Inner, Cond);
+ B.buildOr(Dest, Inner, False, Flags);
+ };
+ return true;
+ }
+
+ // select Cond, C, -1 --> or (sext (not Cond)), C
+ if (FalseValue.isAllOnes()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Not = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Not, Cond);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildSExtOrTrunc(Inner, Not);
+ B.buildOr(Dest, Inner, True, Flags);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: use knownbits to determine zeros
+bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
+ BuildFnTy &MatchInfo) {
+ uint32_t Flags = Select->getFlags();
+ Register DstReg = Select->getReg(0);
+ Register Cond = Select->getCondReg();
+ Register True = Select->getTrueReg();
+ Register False = Select->getFalseReg();
+ LLT CondTy = MRI.getType(Select->getCondReg());
+ LLT TrueTy = MRI.getType(Select->getTrueReg());
+
+ // Boolean or fixed vector of booleans.
+ if (CondTy.isScalableVector() ||
+ (CondTy.isFixedVector() &&
+ CondTy.getElementType().getScalarSizeInBits() != 1) ||
+ CondTy.getScalarSizeInBits() != 1)
+ return false;
+
+ if (CondTy != TrueTy)
+ return false;
+
+ // select Cond, Cond, F --> or Cond, F
+ // select Cond, 1, F --> or Cond, F
+ if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Cond);
+ B.buildOr(DstReg, Ext, False, Flags);
+ };
+ return true;
+ }
+
+ // select Cond, T, Cond --> and Cond, T
+ // select Cond, T, 0 --> and Cond, T
+ if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Cond);
+ B.buildAnd(DstReg, Ext, True);
+ };
+ return true;
+ }
+
+ // select Cond, T, 1 --> or (not Cond), T
+ if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ // First the not.
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ // Then an ext to match the destination register.
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Inner);
+ B.buildOr(DstReg, Ext, True, Flags);
+ };
+ return true;
+ }
+
+ // select Cond, 0, F --> and (not Cond), F
+ if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ // First the not.
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ // Then an ext to match the destination register.
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Inner);
+ B.buildAnd(DstReg, Ext, False);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GSelect *Select = cast<GSelect>(&MI);
+
+ if (tryFoldSelectOfConstants(Select, MatchInfo))
+ return true;
+
+ if (tryFoldBoolSelectToLogic(Select, MatchInfo))
+ return true;
+
+ return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 81d38a5b080470..be2de620fa456c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -298,3 +298,249 @@ body: |
%ext:_(s32) = G_ANYEXT %sel
$w0 = COPY %ext(s32)
...
+---
+# select cond, 1, 0 --> zext(Cond)
+name: select_cond_1_0_to_zext_cond
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_1_0_to_zext_cond
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %c(s1)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %zero:_(s1) = G_CONSTANT i1 0
+ %one:_(s1) = G_CONSTANT i1 1
+ %sel:_(s1) = G_SELECT %c, %one, %zero
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, 0, 1 --> zext(!Cond)
+name: select_cond_0_1_to_sext_not_cond
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_0_1_to_sext_not_cond
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %one:_(s1) = G_CONSTANT i1 true
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, %one
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT [[XOR]](s1)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %zero:_(s1) = G_CONSTANT i1 0
+ %one:_(s1) = G_CONSTANT i1 1
+ %sel:_(s1) = G_SELECT %c, %zero, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, 2, 1 --> and (zext Cond), false
+name: select_cond_2_1_to_and_zext_cond_false
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_2_1_to_and_zext_cond_false
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 101
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %c(s1)
+ ; CHECK-NEXT: %sel:_(s8) = G_ADD [[ZEXT]], %one
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %two:_(s8) = G_CONSTANT i8 102
+ %one:_(s8) = G_CONSTANT i8 101
+ %sel:_(s8) = G_SELECT %c, %two, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, 1, 2 --> and (ext Cond), false
+name: select_cond_1_2_to_and_sext_cond_false
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_1_2_to_and_sext_cond_false
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 102
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT %c(s1)
+ ; CHECK-NEXT: %sel:_(s8) = G_ADD [[SEXT]], %one
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %two:_(s8) = G_CONSTANT i8 101
+ %one:_(s8) = G_CONSTANT i8 102
+ %sel:_(s8) = G_SELECT %c, %two, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, 64, 0 --> (zext Cond) << log2(Pow2)
+name: select_cond_64_0_to_shift
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_64_0_to_shift
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %c(s1)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 6
+ ; CHECK-NEXT: %sel:_(s8) = G_SHL [[ZEXT]], [[C]](s8)
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %two:_(s8) = G_CONSTANT i8 64
+ %one:_(s8) = G_CONSTANT i8 0
+ %sel:_(s8) = G_SELECT %c, %two, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, -1, 0 --> sext Cond
+name: select_cond_minus_1_0_to_sext_cond
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_minus_1_0_to_sext_cond
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %ext:_(s32) = G_SEXT %c(s1)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %two:_(s8) = G_CONSTANT i8 255
+ %one:_(s8) = G_CONSTANT i8 0
+ %sel:_(s8) = G_SELECT %c, %two, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, 0, -1 --> sext (!Cond)
+name: select_cond_0_minus_1_to_sext_not_cond
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_0_minus_1_to_sext_not_cond
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]]
+ ; CHECK-NEXT: %ext:_(s32) = G_SEXT [[XOR]](s1)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %two:_(s8) = G_CONSTANT i8 0
+ %one:_(s8) = G_CONSTANT i8 255
+ %sel:_(s8) = G_SELECT %c, %two, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, -1, 101 --> or (sext Cond), 101
+name: select_cond_minus_1_101_to_or_sext_cond_101
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_minus_1_101_to_or_sext_cond_101
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 101
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT %c(s1)
+ ; CHECK-NEXT: %sel:_(s8) = G_OR [[SEXT]], %one
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %two:_(s8) = G_CONSTANT i8 255
+ %one:_(s8) = G_CONSTANT i8 101
+ %sel:_(s8) = G_SELECT %c, %two, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
+---
+# select cond, 101, -1 --> or (sext (not Cond), 101
+name: select_cond_101_minus_1_to_or_sext_not_cond_101
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: select_cond_101_minus_1_to_or_sext_not_cond_101
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: %two:_(s8) = G_CONSTANT i8 101
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]]
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT [[XOR]](s1)
+ ; CHECK-NEXT: %sel:_(s8) = G_OR [[SEXT]], %two
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8)
+ ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %c:_(s1) = G_TRUNC %0
+ %t:_(s1) = G_TRUNC %1
+ %f:_(s1) = G_TRUNC %2
+ %two:_(s8) = G_CONSTANT i8 101
+ %one:_(s8) = G_CONSTANT i8 255
+ %sel:_(s8) = G_SELECT %c, %two, %one
+ %ext:_(s32) = G_ANYEXT %sel
+ $w0 = COPY %ext(s32)
+...
diff --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll
index 9a7fa04982990b..cbacd17c846d45 100644
--- a/llvm/test/CodeGen/AArch64/andcompare.ll
+++ b/llvm/test/CodeGen/AArch64/andcompare.ll
@@ -2451,7 +2451,7 @@ define i32 @cmp_to_ands3(i32 %num, i32 %a) {
;
; GISEL-LABEL: cmp_to_ands3:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov w8, #23
+; GISEL-NEXT: mov w8, #23 // =0x17
; GISEL-NEXT: and w8, w0, w8
; GISEL-NEXT: cmp w8, #7
; GISEL-NEXT: csel w0, w1, wzr, hi
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 821f6e403a2713..446526986b8837 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -14,7 +14,7 @@ define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
; CHECK-NEXT: bl _foo
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: LBB0_2: ; %if.end
-; CHECK-NEXT: mov w0, #7
+; CHECK-NEXT: mov w0, #7 ; =0x7
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 5
@@ -42,7 +42,7 @@ define i32 @single_
diff erent(i32 %a, i32 %b) nounwind ssp {
; SDISEL-NEXT: bl _foo
; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; SDISEL-NEXT: LBB1_2: ; %if.end
-; SDISEL-NEXT: mov w0, #7
+; SDISEL-NEXT: mov w0, #7 ; =0x7
; SDISEL-NEXT: ret
;
; GISEL-LABEL: single_
diff erent:
@@ -55,7 +55,7 @@ define i32 @single_
diff erent(i32 %a, i32 %b) nounwind ssp {
; GISEL-NEXT: bl _foo
; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; GISEL-NEXT: LBB1_2: ; %if.end
-; GISEL-NEXT: mov w0, #7
+; GISEL-NEXT: mov w0, #7 ; =0x7
; GISEL-NEXT: ret
entry:
%cmp = icmp sle i32 %a, 5
@@ -88,7 +88,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
; SDISEL-NEXT: bl _foo
; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; SDISEL-NEXT: LBB2_3: ; %if.end
-; SDISEL-NEXT: mov w0, #7
+; SDISEL-NEXT: mov w0, #7 ; =0x7
; SDISEL-NEXT: ret
;
; GISEL-LABEL: single_flagclobber:
@@ -106,7 +106,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
; GISEL-NEXT: bl _foo
; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; GISEL-NEXT: LBB2_3: ; %if.end
-; GISEL-NEXT: mov w0, #7
+; GISEL-NEXT: mov w0, #7 ; =0x7
; GISEL-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 5
@@ -144,7 +144,7 @@ define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
; CHECK-NEXT: bl _foo
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: LBB3_3: ; %if.end
-; CHECK-NEXT: mov w0, #7
+; CHECK-NEXT: mov w0, #7 ; =0x7
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 5
@@ -178,13 +178,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
; SDISEL-NEXT: ccmp w8, #16, #0, ge
; SDISEL-NEXT: b.le LBB4_2
; SDISEL-NEXT: ; %bb.1: ; %if.end
-; SDISEL-NEXT: mov w0, #7
+; SDISEL-NEXT: mov w0, #7 ; =0x7
; SDISEL-NEXT: ret
; SDISEL-NEXT: LBB4_2: ; %if.then
; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; SDISEL-NEXT: bl _foo
; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT: mov w0, #7
+; SDISEL-NEXT: mov w0, #7 ; =0x7
; SDISEL-NEXT: ret
;
; GISEL-LABEL: speculate_division:
@@ -194,13 +194,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
; GISEL-NEXT: ccmp w8, #17, #0, gt
; GISEL-NEXT: b.lt LBB4_2
; GISEL-NEXT: ; %bb.1: ; %if.end
-; GISEL-NEXT: mov w0, #7
+; GISEL-NEXT: mov w0, #7 ; =0x7
; GISEL-NEXT: ret
; GISEL-NEXT: LBB4_2: ; %if.then
; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; GISEL-NEXT: bl _foo
; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT: mov w0, #7
+; GISEL-NEXT: mov w0, #7 ; =0x7
; GISEL-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 0
@@ -230,13 +230,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
; SDISEL-NEXT: fccmp s0, s1, #8, ge
; SDISEL-NEXT: b.ge LBB5_2
; SDISEL-NEXT: ; %bb.1: ; %if.end
-; SDISEL-NEXT: mov w0, #7
+; SDISEL-NEXT: mov w0, #7 ; =0x7
; SDISEL-NEXT: ret
; SDISEL-NEXT: LBB5_2: ; %if.then
; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; SDISEL-NEXT: bl _foo
; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT: mov w0, #7
+; SDISEL-NEXT: mov w0, #7 ; =0x7
; SDISEL-NEXT: ret
;
; GISEL-LABEL: single_fcmp:
@@ -248,13 +248,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
; GISEL-NEXT: fccmp s0, s1, #8, gt
; GISEL-NEXT: b.ge LBB5_2
; GISEL-NEXT: ; %bb.1: ; %if.end
-; GISEL-NEXT: mov w0, #7
+; GISEL-NEXT: mov w0, #7 ; =0x7
; GISEL-NEXT: ret
; GISEL-NEXT: LBB5_2: ; %if.then
; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; GISEL-NEXT: bl _foo
; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT: mov w0, #7
+; GISEL-NEXT: mov w0, #7 ; =0x7
; GISEL-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 0
@@ -318,7 +318,7 @@ define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
; CHECK-NEXT: bl _foo
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: LBB7_2: ; %if.end
-; CHECK-NEXT: mov w0, #7
+; CHECK-NEXT: mov w0, #7 ; =0x7
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 0
@@ -346,13 +346,13 @@ define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
; CHECK-NEXT: cmp w1, #32
; CHECK-NEXT: b.eq LBB8_3
; CHECK-NEXT: ; %bb.2: ; %if.end
-; CHECK-NEXT: mov w0, #7
+; CHECK-NEXT: mov w0, #7 ; =0x7
; CHECK-NEXT: ret
; CHECK-NEXT: LBB8_3: ; %if.then
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-NEXT: bl _foo
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; CHECK-NEXT: mov w0, #7
+; CHECK-NEXT: mov w0, #7 ; =0x7
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 5
@@ -380,7 +380,7 @@ define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
; CHECK-NEXT: bl _foo
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: LBB9_2: ; %if.end
-; CHECK-NEXT: mov w0, #7
+; CHECK-NEXT: mov w0, #7 ; =0x7
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 0
@@ -408,7 +408,7 @@ define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
; CHECK-NEXT: bl _foo
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: LBB10_2: ; %if.end
-; CHECK-NEXT: mov w0, #7
+; CHECK-NEXT: mov w0, #7 ; =0x7
; CHECK-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 0
@@ -466,7 +466,7 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
;
; GISEL-LABEL: select_and:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5
+; GISEL-NEXT: mov w8, #5 ; =0x5
; GISEL-NEXT: cmp w8, w1
; GISEL-NEXT: ccmp w0, w1, #0, ne
; GISEL-NEXT: csel x0, x2, x3, lt
@@ -488,7 +488,7 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
;
; GISEL-LABEL: select_or:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5
+; GISEL-NEXT: mov w8, #5 ; =0x5
; GISEL-NEXT: cmp w8, w1
; GISEL-NEXT: ccmp w0, w1, #8, eq
; GISEL-NEXT: csel x0, x2, x3, lt
@@ -510,7 +510,7 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
;
; GISEL-LABEL: select_or_float:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5
+; GISEL-NEXT: mov w8, #5 ; =0x5
; GISEL-NEXT: cmp w8, w1
; GISEL-NEXT: ccmp w0, w1, #8, eq
; GISEL-NEXT: fcsel s0, s0, s1, lt
@@ -528,17 +528,22 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
; SDISEL-NEXT: cmp x0, #2
; SDISEL-NEXT: ccmp x0, #4, #4, ne
; SDISEL-NEXT: ccmp x1, #0, #0, eq
-; SDISEL-NEXT: mov w8, #1
+; SDISEL-NEXT: mov w8, #1 ; =0x1
; SDISEL-NEXT: cinc x0, x8, eq
; SDISEL-NEXT: ret
;
; GISEL-LABEL: gccbug:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #2
+; GISEL-NEXT: cmp x1, #0
+; GISEL-NEXT: cset w8, eq
; GISEL-NEXT: cmp x0, #2
-; GISEL-NEXT: ccmp x0, #4, #4, ne
-; GISEL-NEXT: ccmp x1, #0, #0, eq
-; GISEL-NEXT: csinc x0, x8, xzr, eq
+; GISEL-NEXT: cset w9, eq
+; GISEL-NEXT: cmp x0, #4
+; GISEL-NEXT: cset w10, eq
+; GISEL-NEXT: orr w9, w10, w9
+; GISEL-NEXT: and w8, w9, w8
+; GISEL-NEXT: and x8, x8, #0x1
+; GISEL-NEXT: add x0, x8, #1
; GISEL-NEXT: ret
%cmp0 = icmp eq i64 %x1, 0
%cmp1 = icmp eq i64 %x0, 2
@@ -592,7 +597,7 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
; SDISEL-LABEL: select_andor32:
; SDISEL: ; %bb.0:
; SDISEL-NEXT: cmp w1, w2
-; SDISEL-NEXT: mov w8, #32
+; SDISEL-NEXT: mov w8, #32 ; =0x20
; SDISEL-NEXT: ccmp w0, w8, #4, lt
; SDISEL-NEXT: ccmp w0, w1, #0, eq
; SDISEL-NEXT: csel w0, w0, w1, eq
@@ -600,7 +605,7 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
;
; GISEL-LABEL: select_andor32:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #32
+; GISEL-NEXT: mov w8, #32 ; =0x20
; GISEL-NEXT: cmp w1, w2
; GISEL-NEXT: ccmp w0, w8, #4, lt
; GISEL-NEXT: ccmp w0, w1, #0, eq
@@ -701,11 +706,11 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
; SDISEL-NEXT: ccmp w0, #13, #0, ge
; SDISEL-NEXT: cset w8, gt
; SDISEL-NEXT: cmp w0, #22
-; SDISEL-NEXT: mov w9, #44
+; SDISEL-NEXT: mov w9, #44 ; =0x2c
; SDISEL-NEXT: ccmp w0, w9, #0, ge
; SDISEL-NEXT: csel w8, wzr, w8, le
; SDISEL-NEXT: cmp w0, #99
-; SDISEL-NEXT: mov w9, #77
+; SDISEL-NEXT: mov w9, #77 ; =0x4d
; SDISEL-NEXT: ccmp w0, w9, #4, ne
; SDISEL-NEXT: cset w9, eq
; SDISEL-NEXT: tst w8, w9
diff --git a/llvm/test/CodeGen/AArch64/call-rv-marker.ll b/llvm/test/CodeGen/AArch64/call-rv-marker.ll
index fc06809ad09fb6..de8f5bbfb484d6 100644
--- a/llvm/test/CodeGen/AArch64/call-rv-marker.ll
+++ b/llvm/test/CodeGen/AArch64/call-rv-marker.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -o - %s | FileCheck --check-prefix=SELDAG --check-prefix=CHECK %s
; RUN: llc -global-isel -o - %s | FileCheck --check-prefix=GISEL --check-prefix=CHECK %s
@@ -25,37 +26,93 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
@fptr = dso_local global ptr null, align 8
define dso_local ptr @rv_marker_1_retain() {
-; CHECK-LABEL: _rv_marker_1_retain:
-; CHECK: bl _foo1
-; CHECK-NEXT: mov x29, x29
-; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-LABEL: rv_marker_1_retain:
+; SELDAG: ; %bb.0: ; %entry
+; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; SELDAG-NEXT: .cfi_def_cfa_offset 16
+; SELDAG-NEXT: .cfi_offset w30, -8
+; SELDAG-NEXT: .cfi_offset w29, -16
+; SELDAG-NEXT: bl _foo1
+; SELDAG-NEXT: mov x29, x29
+; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; SELDAG-NEXT: ret
;
+; GISEL-LABEL: rv_marker_1_retain:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: .cfi_offset w30, -8
+; GISEL-NEXT: .cfi_offset w29, -16
+; GISEL-NEXT: bl _foo1
+; GISEL-NEXT: mov x29, x29
+; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue
+; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; GISEL-NEXT: ret
entry:
%call = call ptr @foo1() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ]
ret ptr %call
}
define dso_local ptr @rv_marker_1_unsafeClaim() {
-; CHECK-LABEL: _rv_marker_1_unsafeClaim:
-; CHECK: bl _foo1
-; CHECK-NEXT: mov x29, x29
-; CHECK-NEXT: bl _objc_unsafeClaimAutoreleasedReturnValue
+; SELDAG-LABEL: rv_marker_1_unsafeClaim:
+; SELDAG: ; %bb.0: ; %entry
+; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; SELDAG-NEXT: .cfi_def_cfa_offset 16
+; SELDAG-NEXT: .cfi_offset w30, -8
+; SELDAG-NEXT: .cfi_offset w29, -16
+; SELDAG-NEXT: bl _foo1
+; SELDAG-NEXT: mov x29, x29
+; SELDAG-NEXT: bl _objc_unsafeClaimAutoreleasedReturnValue
+; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; SELDAG-NEXT: ret
;
+; GISEL-LABEL: rv_marker_1_unsafeClaim:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: .cfi_offset w30, -8
+; GISEL-NEXT: .cfi_offset w29, -16
+; GISEL-NEXT: bl _foo1
+; GISEL-NEXT: mov x29, x29
+; GISEL-NEXT: bl _objc_unsafeClaimAutoreleasedReturnValue
+; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; GISEL-NEXT: ret
entry:
%call = call ptr @foo1() [ "clang.arc.attachedcall"(ptr @objc_unsafeClaimAutoreleasedReturnValue) ]
ret ptr %call
}
define dso_local void @rv_marker_2_select(i32 %c) {
-; CHECK-LABEL: _rv_marker_2_select:
-; SELDAG: cinc w0, w8, eq
-; GISEL: csinc w0, w8, wzr, eq
-; CHECK-NEXT: bl _foo0
-; CHECK-NEXT: mov x29, x29
-; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue
-; CHECK-NEXT: ldp x29, x30, [sp], #16
-; CHECK-NEXT: b _foo2
+; SELDAG-LABEL: rv_marker_2_select:
+; SELDAG: ; %bb.0: ; %entry
+; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; SELDAG-NEXT: .cfi_def_cfa_offset 16
+; SELDAG-NEXT: .cfi_offset w30, -8
+; SELDAG-NEXT: .cfi_offset w29, -16
+; SELDAG-NEXT: mov w8, #1 ; =0x1
+; SELDAG-NEXT: cmp w0, #0
+; SELDAG-NEXT: cinc w0, w8, eq
+; SELDAG-NEXT: bl _foo0
+; SELDAG-NEXT: mov x29, x29
+; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; SELDAG-NEXT: b _foo2
;
+; GISEL-LABEL: rv_marker_2_select:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: .cfi_offset w30, -8
+; GISEL-NEXT: .cfi_offset w29, -16
+; GISEL-NEXT: mov w8, #1 ; =0x1
+; GISEL-NEXT: cmp w0, #0
+; GISEL-NEXT: cinc w0, w8, eq
+; GISEL-NEXT: bl _foo0
+; GISEL-NEXT: mov x29, x29
+; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue
+; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; GISEL-NEXT: b _foo2
entry:
%tobool.not = icmp eq i32 %c, 0
%.sink = select i1 %tobool.not, i32 2, i32 1
@@ -65,11 +122,121 @@ entry:
}
define dso_local void @rv_marker_3() personality ptr @__gxx_personality_v0 {
-; CHECK-LABEL: _rv_marker_3:
-; CHECK: bl _foo1
-; CHECK-NEXT: mov x29, x29
-; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-LABEL: rv_marker_3:
+; SELDAG: Lfunc_begin0:
+; SELDAG-NEXT: .cfi_startproc
+; SELDAG-NEXT: .cfi_personality 155, ___gxx_personality_v0
+; SELDAG-NEXT: .cfi_lsda 16, Lexception0
+; SELDAG-NEXT: ; %bb.0: ; %entry
+; SELDAG-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; SELDAG-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; SELDAG-NEXT: .cfi_def_cfa_offset 32
+; SELDAG-NEXT: .cfi_offset w30, -8
+; SELDAG-NEXT: .cfi_offset w29, -16
+; SELDAG-NEXT: .cfi_offset w19, -24
+; SELDAG-NEXT: .cfi_offset w20, -32
+; SELDAG-NEXT: bl _foo1
+; SELDAG-NEXT: mov x29, x29
+; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-NEXT: mov x19, x0
+; SELDAG-NEXT: Ltmp0:
+; SELDAG-NEXT: bl _objc_object
+; SELDAG-NEXT: Ltmp1:
+; SELDAG-NEXT: ; %bb.1: ; %invoke.cont
+; SELDAG-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; SELDAG-NEXT: mov x0, x19
+; SELDAG-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; SELDAG-NEXT: b _objc_release
+; SELDAG-NEXT: LBB3_2: ; %lpad
+; SELDAG-NEXT: Ltmp2:
+; SELDAG-NEXT: mov x20, x0
+; SELDAG-NEXT: mov x0, x19
+; SELDAG-NEXT: bl _objc_release
+; SELDAG-NEXT: mov x0, x20
+; SELDAG-NEXT: bl __Unwind_Resume
+; SELDAG-NEXT: Lfunc_end0:
+; SELDAG-NEXT: .cfi_endproc
+; SELDAG-NEXT: .section __TEXT,__gcc_except_tab
+; SELDAG-NEXT: .p2align 2, 0x0
+; SELDAG-NEXT: GCC_except_table3:
+; SELDAG-NEXT: Lexception0:
+; SELDAG-NEXT: .byte 255 ; @LPStart Encoding = omit
+; SELDAG-NEXT: .byte 255 ; @TType Encoding = omit
+; SELDAG-NEXT: .byte 1 ; Call site Encoding = uleb128
+; SELDAG-NEXT: .uleb128 Lcst_end0-Lcst_begin0
+; SELDAG-NEXT: Lcst_begin0:
+; SELDAG-NEXT: .uleb128 Lfunc_begin0-Lfunc_begin0 ; >> Call Site 1 <<
+; SELDAG-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; Call between Lfunc_begin0 and Ltmp0
+; SELDAG-NEXT: .byte 0 ; has no landing pad
+; SELDAG-NEXT: .byte 0 ; On action: cleanup
+; SELDAG-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; >> Call Site 2 <<
+; SELDAG-NEXT: .uleb128 Ltmp1-Ltmp0 ; Call between Ltmp0 and Ltmp1
+; SELDAG-NEXT: .uleb128 Ltmp2-Lfunc_begin0 ; jumps to Ltmp2
+; SELDAG-NEXT: .byte 0 ; On action: cleanup
+; SELDAG-NEXT: .uleb128 Ltmp1-Lfunc_begin0 ; >> Call Site 3 <<
+; SELDAG-NEXT: .uleb128 Lfunc_end0-Ltmp1 ; Call between Ltmp1 and Lfunc_end0
+; SELDAG-NEXT: .byte 0 ; has no landing pad
+; SELDAG-NEXT: .byte 0 ; On action: cleanup
+; SELDAG-NEXT: Lcst_end0:
+; SELDAG-NEXT: .p2align 2, 0x0
;
+; GISEL-LABEL: rv_marker_3:
+; GISEL: Lfunc_begin0:
+; GISEL-NEXT: .cfi_startproc
+; GISEL-NEXT: .cfi_personality 155, ___gxx_personality_v0
+; GISEL-NEXT: .cfi_lsda 16, Lexception0
+; GISEL-NEXT: ; %bb.0: ; %entry
+; GISEL-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; GISEL-NEXT: .cfi_def_cfa_offset 32
+; GISEL-NEXT: .cfi_offset w30, -8
+; GISEL-NEXT: .cfi_offset w29, -16
+; GISEL-NEXT: .cfi_offset w19, -24
+; GISEL-NEXT: .cfi_offset w20, -32
+; GISEL-NEXT: bl _foo1
+; GISEL-NEXT: mov x29, x29
+; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue
+; GISEL-NEXT: mov x19, x0
+; GISEL-NEXT: Ltmp0:
+; GISEL-NEXT: bl _objc_object
+; GISEL-NEXT: Ltmp1:
+; GISEL-NEXT: ; %bb.1: ; %invoke.cont
+; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; GISEL-NEXT: mov x0, x19
+; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; GISEL-NEXT: b _objc_release
+; GISEL-NEXT: LBB3_2: ; %lpad
+; GISEL-NEXT: Ltmp2:
+; GISEL-NEXT: mov x20, x0
+; GISEL-NEXT: mov x0, x19
+; GISEL-NEXT: bl _objc_release
+; GISEL-NEXT: mov x0, x20
+; GISEL-NEXT: bl __Unwind_Resume
+; GISEL-NEXT: Lfunc_end0:
+; GISEL-NEXT: .cfi_endproc
+; GISEL-NEXT: .section __TEXT,__gcc_except_tab
+; GISEL-NEXT: .p2align 2, 0x0
+; GISEL-NEXT: GCC_except_table3:
+; GISEL-NEXT: Lexception0:
+; GISEL-NEXT: .byte 255 ; @LPStart Encoding = omit
+; GISEL-NEXT: .byte 255 ; @TType Encoding = omit
+; GISEL-NEXT: .byte 1 ; Call site Encoding = uleb128
+; GISEL-NEXT: .uleb128 Lcst_end0-Lcst_begin0
+; GISEL-NEXT: Lcst_begin0:
+; GISEL-NEXT: .uleb128 Lfunc_begin0-Lfunc_begin0 ; >> Call Site 1 <<
+; GISEL-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; Call between Lfunc_begin0 and Ltmp0
+; GISEL-NEXT: .byte 0 ; has no landing pad
+; GISEL-NEXT: .byte 0 ; On action: cleanup
+; GISEL-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ; >> Call Site 2 <<
+; GISEL-NEXT: .uleb128 Ltmp1-Ltmp0 ; Call between Ltmp0 and Ltmp1
+; GISEL-NEXT: .uleb128 Ltmp2-Lfunc_begin0 ; jumps to Ltmp2
+; GISEL-NEXT: .byte 0 ; On action: cleanup
+; GISEL-NEXT: .uleb128 Ltmp1-Lfunc_begin0 ; >> Call Site 3 <<
+; GISEL-NEXT: .uleb128 Lfunc_end0-Ltmp1 ; Call between Ltmp1 and Lfunc_end0
+; GISEL-NEXT: .byte 0 ; has no landing pad
+; GISEL-NEXT: .byte 0 ; On action: cleanup
+; GISEL-NEXT: Lcst_end0:
+; GISEL-NEXT: .p2align 2, 0x0
entry:
%call = call ptr @foo1() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ]
invoke void @objc_object(ptr %call) #5
@@ -87,13 +254,151 @@ lpad: ; preds = %entry
}
define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 {
-; CHECK-LABEL: _rv_marker_4:
-; CHECK: Ltmp3:
-; CHECK-NEXT: bl _foo1
-; CHECK-NEXT: mov x29, x29
-; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue
-; CHECK-NEXT: Ltmp4:
+; SELDAG-LABEL: rv_marker_4:
+; SELDAG: Lfunc_begin1:
+; SELDAG-NEXT: .cfi_startproc
+; SELDAG-NEXT: .cfi_personality 155, ___gxx_personality_v0
+; SELDAG-NEXT: .cfi_lsda 16, Lexception1
+; SELDAG-NEXT: ; %bb.0: ; %entry
+; SELDAG-NEXT: sub sp, sp, #48
+; SELDAG-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; SELDAG-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; SELDAG-NEXT: .cfi_def_cfa_offset 48
+; SELDAG-NEXT: .cfi_offset w30, -8
+; SELDAG-NEXT: .cfi_offset w29, -16
+; SELDAG-NEXT: .cfi_offset w19, -24
+; SELDAG-NEXT: .cfi_offset w20, -32
+; SELDAG-NEXT: Ltmp3:
+; SELDAG-NEXT: bl _foo1
+; SELDAG-NEXT: mov x29, x29
+; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-NEXT: Ltmp4:
+; SELDAG-NEXT: ; %bb.1: ; %invoke.cont
+; SELDAG-NEXT: Ltmp6:
+; SELDAG-NEXT: mov x19, x0
+; SELDAG-NEXT: bl _objc_object
+; SELDAG-NEXT: Ltmp7:
+; SELDAG-NEXT: ; %bb.2: ; %invoke.cont2
+; SELDAG-NEXT: mov x0, x19
+; SELDAG-NEXT: bl _objc_release
+; SELDAG-NEXT: add x0, sp, #15
+; SELDAG-NEXT: bl __ZN1SD1Ev
+; SELDAG-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; SELDAG-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; SELDAG-NEXT: add sp, sp, #48
+; SELDAG-NEXT: ret
+; SELDAG-NEXT: LBB4_3: ; %lpad1
+; SELDAG-NEXT: Ltmp8:
+; SELDAG-NEXT: mov x20, x0
+; SELDAG-NEXT: mov x0, x19
+; SELDAG-NEXT: bl _objc_release
+; SELDAG-NEXT: b LBB4_5
+; SELDAG-NEXT: LBB4_4: ; %lpad
+; SELDAG-NEXT: Ltmp5:
+; SELDAG-NEXT: mov x20, x0
+; SELDAG-NEXT: LBB4_5: ; %ehcleanup
+; SELDAG-NEXT: add x0, sp, #15
+; SELDAG-NEXT: bl __ZN1SD1Ev
+; SELDAG-NEXT: mov x0, x20
+; SELDAG-NEXT: bl __Unwind_Resume
+; SELDAG-NEXT: Lfunc_end1:
+; SELDAG-NEXT: .cfi_endproc
+; SELDAG-NEXT: .section __TEXT,__gcc_except_tab
+; SELDAG-NEXT: .p2align 2, 0x0
+; SELDAG-NEXT: GCC_except_table4:
+; SELDAG-NEXT: Lexception1:
+; SELDAG-NEXT: .byte 255 ; @LPStart Encoding = omit
+; SELDAG-NEXT: .byte 255 ; @TType Encoding = omit
+; SELDAG-NEXT: .byte 1 ; Call site Encoding = uleb128
+; SELDAG-NEXT: .uleb128 Lcst_end1-Lcst_begin1
+; SELDAG-NEXT: Lcst_begin1:
+; SELDAG-NEXT: .uleb128 Ltmp3-Lfunc_begin1 ; >> Call Site 1 <<
+; SELDAG-NEXT: .uleb128 Ltmp4-Ltmp3 ; Call between Ltmp3 and Ltmp4
+; SELDAG-NEXT: .uleb128 Ltmp5-Lfunc_begin1 ; jumps to Ltmp5
+; SELDAG-NEXT: .byte 0 ; On action: cleanup
+; SELDAG-NEXT: .uleb128 Ltmp6-Lfunc_begin1 ; >> Call Site 2 <<
+; SELDAG-NEXT: .uleb128 Ltmp7-Ltmp6 ; Call between Ltmp6 and Ltmp7
+; SELDAG-NEXT: .uleb128 Ltmp8-Lfunc_begin1 ; jumps to Ltmp8
+; SELDAG-NEXT: .byte 0 ; On action: cleanup
+; SELDAG-NEXT: .uleb128 Ltmp7-Lfunc_begin1 ; >> Call Site 3 <<
+; SELDAG-NEXT: .uleb128 Lfunc_end1-Ltmp7 ; Call between Ltmp7 and Lfunc_end1
+; SELDAG-NEXT: .byte 0 ; has no landing pad
+; SELDAG-NEXT: .byte 0 ; On action: cleanup
+; SELDAG-NEXT: Lcst_end1:
+; SELDAG-NEXT: .p2align 2, 0x0
;
+; GISEL-LABEL: rv_marker_4:
+; GISEL: Lfunc_begin1:
+; GISEL-NEXT: .cfi_startproc
+; GISEL-NEXT: .cfi_personality 155, ___gxx_personality_v0
+; GISEL-NEXT: .cfi_lsda 16, Lexception1
+; GISEL-NEXT: ; %bb.0: ; %entry
+; GISEL-NEXT: sub sp, sp, #48
+; GISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; GISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; GISEL-NEXT: .cfi_def_cfa_offset 48
+; GISEL-NEXT: .cfi_offset w30, -8
+; GISEL-NEXT: .cfi_offset w29, -16
+; GISEL-NEXT: .cfi_offset w19, -24
+; GISEL-NEXT: .cfi_offset w20, -32
+; GISEL-NEXT: Ltmp3:
+; GISEL-NEXT: bl _foo1
+; GISEL-NEXT: mov x29, x29
+; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue
+; GISEL-NEXT: Ltmp4:
+; GISEL-NEXT: ; %bb.1: ; %invoke.cont
+; GISEL-NEXT: Ltmp6:
+; GISEL-NEXT: mov x19, x0
+; GISEL-NEXT: bl _objc_object
+; GISEL-NEXT: Ltmp7:
+; GISEL-NEXT: ; %bb.2: ; %invoke.cont2
+; GISEL-NEXT: mov x0, x19
+; GISEL-NEXT: bl _objc_release
+; GISEL-NEXT: add x0, sp, #15
+; GISEL-NEXT: bl __ZN1SD1Ev
+; GISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; GISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; GISEL-NEXT: add sp, sp, #48
+; GISEL-NEXT: ret
+; GISEL-NEXT: LBB4_3: ; %lpad1
+; GISEL-NEXT: Ltmp8:
+; GISEL-NEXT: mov x20, x0
+; GISEL-NEXT: mov x0, x19
+; GISEL-NEXT: bl _objc_release
+; GISEL-NEXT: b LBB4_5
+; GISEL-NEXT: LBB4_4: ; %lpad
+; GISEL-NEXT: Ltmp5:
+; GISEL-NEXT: mov x20, x0
+; GISEL-NEXT: LBB4_5: ; %ehcleanup
+; GISEL-NEXT: add x0, sp, #15
+; GISEL-NEXT: bl __ZN1SD1Ev
+; GISEL-NEXT: mov x0, x20
+; GISEL-NEXT: bl __Unwind_Resume
+; GISEL-NEXT: Lfunc_end1:
+; GISEL-NEXT: .cfi_endproc
+; GISEL-NEXT: .section __TEXT,__gcc_except_tab
+; GISEL-NEXT: .p2align 2, 0x0
+; GISEL-NEXT: GCC_except_table4:
+; GISEL-NEXT: Lexception1:
+; GISEL-NEXT: .byte 255 ; @LPStart Encoding = omit
+; GISEL-NEXT: .byte 255 ; @TType Encoding = omit
+; GISEL-NEXT: .byte 1 ; Call site Encoding = uleb128
+; GISEL-NEXT: .uleb128 Lcst_end1-Lcst_begin1
+; GISEL-NEXT: Lcst_begin1:
+; GISEL-NEXT: .uleb128 Ltmp3-Lfunc_begin1 ; >> Call Site 1 <<
+; GISEL-NEXT: .uleb128 Ltmp4-Ltmp3 ; Call between Ltmp3 and Ltmp4
+; GISEL-NEXT: .uleb128 Ltmp5-Lfunc_begin1 ; jumps to Ltmp5
+; GISEL-NEXT: .byte 0 ; On action: cleanup
+; GISEL-NEXT: .uleb128 Ltmp6-Lfunc_begin1 ; >> Call Site 2 <<
+; GISEL-NEXT: .uleb128 Ltmp7-Ltmp6 ; Call between Ltmp6 and Ltmp7
+; GISEL-NEXT: .uleb128 Ltmp8-Lfunc_begin1 ; jumps to Ltmp8
+; GISEL-NEXT: .byte 0 ; On action: cleanup
+; GISEL-NEXT: .uleb128 Ltmp7-Lfunc_begin1 ; >> Call Site 3 <<
+; GISEL-NEXT: .uleb128 Lfunc_end1-Ltmp7 ; Call between Ltmp7 and Lfunc_end1
+; GISEL-NEXT: .byte 0 ; has no landing pad
+; GISEL-NEXT: .byte 0 ; On action: cleanup
+; GISEL-NEXT: Lcst_end1:
+; GISEL-NEXT: .p2align 2, 0x0
entry:
%s = alloca %struct.S, align 1
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %s) #2
@@ -129,11 +434,53 @@ ehcleanup: ; preds = %lpad1, %lpad
}
define dso_local ptr @rv_marker_5_indirect_call() {
-; CHECK-LABEL: _rv_marker_5_indirect_call:
-; CHECK: ldr [[ADDR:x[0-9]+]], [
-; CHECK-NEXT: blr [[ADDR]]
-; CHECK-NEXT: mov x29, x29
-; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-LABEL: rv_marker_5_indirect_call:
+; SELDAG: ; %bb.0: ; %entry
+; SELDAG-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; SELDAG-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; SELDAG-NEXT: .cfi_def_cfa_offset 32
+; SELDAG-NEXT: .cfi_offset w30, -8
+; SELDAG-NEXT: .cfi_offset w29, -16
+; SELDAG-NEXT: .cfi_offset w19, -24
+; SELDAG-NEXT: .cfi_offset w20, -32
+; SELDAG-NEXT: Lloh0:
+; SELDAG-NEXT: adrp x8, _fptr at PAGE
+; SELDAG-NEXT: Lloh1:
+; SELDAG-NEXT: ldr x8, [x8, _fptr at PAGEOFF]
+; SELDAG-NEXT: blr x8
+; SELDAG-NEXT: mov x29, x29
+; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-NEXT: mov x19, x0
+; SELDAG-NEXT: bl _foo2
+; SELDAG-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; SELDAG-NEXT: mov x0, x19
+; SELDAG-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; SELDAG-NEXT: ret
+; SELDAG-NEXT: .loh AdrpLdr Lloh0, Lloh1
+;
+; GISEL-LABEL: rv_marker_5_indirect_call:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; GISEL-NEXT: .cfi_def_cfa_offset 32
+; GISEL-NEXT: .cfi_offset w30, -8
+; GISEL-NEXT: .cfi_offset w29, -16
+; GISEL-NEXT: .cfi_offset w19, -24
+; GISEL-NEXT: .cfi_offset w20, -32
+; GISEL-NEXT: Lloh0:
+; GISEL-NEXT: adrp x8, _fptr at PAGE
+; GISEL-NEXT: Lloh1:
+; GISEL-NEXT: ldr x8, [x8, _fptr at PAGEOFF]
+; GISEL-NEXT: blr x8
+; GISEL-NEXT: mov x29, x29
+; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue
+; GISEL-NEXT: mov x19, x0
+; GISEL-NEXT: bl _foo2
+; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; GISEL-NEXT: mov x0, x19
+; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; GISEL-NEXT: ret
+; GISEL-NEXT: .loh AdrpLdr Lloh0, Lloh1
entry:
%0 = load ptr, ptr @fptr, align 8
%call = call ptr %0() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ]
@@ -144,13 +491,35 @@ entry:
declare ptr @foo(i64, i64, i64)
define dso_local void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: _rv_marker_multiarg:
-; CHECK: mov [[TMP:x[0-9]+]], x0
-; CHECK-NEXT: mov x0, x2
-; CHECK-NEXT: mov x2, [[TMP]]
-; CHECK-NEXT: bl _foo
-; CHECK-NEXT: mov x29, x29
-; CHECK-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-LABEL: rv_marker_multiarg:
+; SELDAG: ; %bb.0:
+; SELDAG-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; SELDAG-NEXT: .cfi_def_cfa_offset 16
+; SELDAG-NEXT: .cfi_offset w30, -8
+; SELDAG-NEXT: .cfi_offset w29, -16
+; SELDAG-NEXT: mov x8, x0
+; SELDAG-NEXT: mov x0, x2
+; SELDAG-NEXT: mov x2, x8
+; SELDAG-NEXT: bl _foo
+; SELDAG-NEXT: mov x29, x29
+; SELDAG-NEXT: bl _objc_retainAutoreleasedReturnValue
+; SELDAG-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; SELDAG-NEXT: ret
+;
+; GISEL-LABEL: rv_marker_multiarg:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: .cfi_offset w30, -8
+; GISEL-NEXT: .cfi_offset w29, -16
+; GISEL-NEXT: mov x3, x0
+; GISEL-NEXT: mov x0, x2
+; GISEL-NEXT: mov x2, x3
+; GISEL-NEXT: bl _foo
+; GISEL-NEXT: mov x29, x29
+; GISEL-NEXT: bl _objc_retainAutoreleasedReturnValue
+; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; GISEL-NEXT: ret
call ptr @foo(i64 %c, i64 %b, i64 %a) [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ]
ret void
}
@@ -158,3 +527,5 @@ define dso_local void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) {
declare ptr @objc_retainAutoreleasedReturnValue(ptr)
declare ptr @objc_unsafeClaimAutoreleasedReturnValue(ptr)
declare i32 @__gxx_personality_v0(...)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir
index 9f3ad8b4444462..96a776f6fbb693 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fold-binop-into-select.mir
@@ -450,8 +450,9 @@ body: |
; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero
- ; CHECK-NEXT: %and:_(s32) = G_SELECT %cond(s1), %zero, %variable
+ ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(ne), %reg(s32), %zero
+ ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1)
+ ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable
; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32)
%reg:_(s32) = COPY $vgpr0
%variable:_(s32) = COPY $vgpr0
@@ -476,7 +477,8 @@ body: |
; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero
- ; CHECK-NEXT: %and:_(s32) = G_SELECT %cond(s1), %variable, %zero
+ ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1)
+ ; CHECK-NEXT: %and:_(s32) = G_AND %select, %variable
; CHECK-NEXT: S_ENDPGM 0, implicit %and(s32)
%reg:_(s32) = COPY $vgpr0
%variable:_(s32) = COPY $vgpr0
@@ -500,9 +502,9 @@ body: |
; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero
- ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1
- ; CHECK-NEXT: %or:_(s32) = G_SELECT %cond(s1), %variable, %neg1
+ ; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(ne), %reg(s32), %zero
+ ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1)
+ ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable
; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32)
%reg:_(s32) = COPY $vgpr0
%variable:_(s32) = COPY $vgpr0
@@ -527,8 +529,8 @@ body: |
; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero
- ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1
- ; CHECK-NEXT: %or:_(s32) = G_SELECT %cond(s1), %neg1, %variable
+ ; CHECK-NEXT: %select:_(s32) = G_SEXT %cond(s1)
+ ; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable
; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32)
%reg:_(s32) = COPY $vgpr0
%variable:_(s32) = COPY $vgpr0
@@ -667,9 +669,9 @@ body: |
; CHECK-NEXT: %variable:_(s32) = COPY $vgpr0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero
- ; CHECK-NEXT: %neg1:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: %otherconst:_(s32) = G_CONSTANT i32 123
- ; CHECK-NEXT: %select:_(s32) = G_SELECT %cond(s1), %neg1, %otherconst
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %cond(s1)
+ ; CHECK-NEXT: %select:_(s32) = G_OR [[SEXT]], %otherconst
; CHECK-NEXT: %or:_(s32) = G_OR %select, %variable
; CHECK-NEXT: S_ENDPGM 0, implicit %or(s32)
%reg:_(s32) = COPY $vgpr0
@@ -749,8 +751,7 @@ body: |
; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: %srem:_(s32) = G_SELECT %cond(s1), [[C]], %zero
+ ; CHECK-NEXT: %srem:_(s32) = G_ZEXT %cond(s1)
; CHECK-NEXT: S_ENDPGM 0, implicit %srem(s32)
%reg:_(s32) = COPY $vgpr0
%zero:_(s32) = G_CONSTANT i32 0
@@ -802,8 +803,7 @@ body: |
; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %cond:_(s1) = G_ICMP intpred(eq), %reg(s32), %zero
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: %udiv:_(s32) = G_SELECT %cond(s1), [[C]], %zero
+ ; CHECK-NEXT: %udiv:_(s32) = G_ZEXT %cond(s1)
; CHECK-NEXT: S_ENDPGM 0, implicit %udiv(s32)
%reg:_(s32) = COPY $vgpr0
%zero:_(s32) = G_CONSTANT i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index ccf4e84fbbbd16..4ac1fad6deecdc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -37,7 +37,8 @@ define amdgpu_ps void @divergent_i1_phi_uniform_branch(ptr addrspace(1) %out, i3
; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2
; GFX10-NEXT: global_store_dword v[3:4], v5, off
; GFX10-NEXT: .LBB0_3: ; %exit
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
A:
@@ -72,7 +73,8 @@ define amdgpu_ps void @divergent_i1_phi_uniform_branch_simple(ptr addrspace(1) %
; GFX10-NEXT: .LBB1_2: ; %B
; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2
; GFX10-NEXT: .LBB1_3: ; %exit
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
A:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
index afd271c9957700..c1f3924e466d57 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
@@ -14,7 +14,8 @@ define amdgpu_ps void @divergent_i1_phi_if_then(ptr addrspace(1) %out, i32 %tid,
; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2
; GFX10-NEXT: ; %bb.2: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
A:
@@ -51,7 +52,8 @@ define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid,
; GFX10-NEXT: v_cmp_le_u32_e64 s0, 1, v2
; GFX10-NEXT: ; %bb.4: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index 794b10eea58b9b..0cd409f726af2f 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -1517,7 +1517,8 @@ define float @v_recip_sqrt_f32_ulp25(float %x) {
; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; CODEGEN-IEEE-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1558,7 +1559,8 @@ define float @v_recip_sqrt_f32_ulp25(float %x) {
; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; IR-IEEE-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; IR-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
index 97216b6c94693c..b516660f3bdc69 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
@@ -230,15 +230,16 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; VI-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0
; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4
; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12
; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0
; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6
; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13
; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12
+; VI-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9
; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7
+; VI-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7
; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2
; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
@@ -358,20 +359,21 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
-; GFX10-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0
; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
+; GFX10-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000
; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
-; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12
+; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12
; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9
-; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6
-; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6
-; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7
+; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6
+; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7
+; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6
+; GFX10-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8
; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6
+; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6
; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
@@ -497,24 +499,24 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
-; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
-; GFX11-SAFE-GISEL-NEXT: s_movk_i32 s5, 0x7e00
; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0
; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s7, s2, 0x1000
+; GFX11-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000
; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
-; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s9, s4, 12
+; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12
; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s9
-; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s8, s7, s6
-; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s8, s6
-; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s7
+; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6
+; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7
+; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6
+; GFX11-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8
; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s8, s6
+; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6
; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
index 046f2624696958..31e481bf7aa4d8 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
@@ -1850,7 +1850,8 @@ define float @v_sqrt_f32_ulp2(float %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1886,7 +1887,8 @@ define float @v_sqrt_f32_ulp25(float %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1922,7 +1924,8 @@ define float @v_sqrt_f32_ulp3(float %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1957,7 +1960,8 @@ define float @v_sqrt_f32_ulp2_fabs(float %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, s[4:5]
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, s[4:5]
@@ -2090,10 +2094,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2(<2 x float> %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, s[4:5]
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
@@ -2232,10 +2238,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_fabs(<2 x float> %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 32, s[4:5]
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[6:7], |v1|, v2
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7]
; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v3
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, s[6:7]
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v1, |v1|, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
@@ -2328,7 +2336,8 @@ define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -2425,7 +2434,8 @@ define float @v_sqrt_f32_ulp2_noncontractable_fdiv(float %x, float %y) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
@@ -2509,7 +2519,8 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv(float %x, float %y) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
@@ -2589,7 +2600,8 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
@@ -2658,10 +2670,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_noncontractable_rcp(<2 x float> %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 32, s[4:5]
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
@@ -2802,10 +2816,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv(<2 x float> %x, <2 x flo
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 32, s[4:5]
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
@@ -2929,10 +2945,12 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv_arcp(<2 x float> %x, <2
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 32, s[4:5]
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
@@ -3029,7 +3047,8 @@ define float @v_sqrt_f32_known_never_posdenormal_ulp2(float nofpclass(psub) %x)
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3064,7 +3083,8 @@ define float @v_sqrt_f32_nsz_known_never_posdenormal_ulp2(float nofpclass(psub)
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3099,7 +3119,8 @@ define float @v_sqrt_f32_known_never_negdenormal(float nofpclass(nsub) %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3698,7 +3719,8 @@ define float @v_sqrt_f32_known_never_zero_never_ninf_ulp2(float nofpclass(zero n
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3733,7 +3755,8 @@ define float @v_sqrt_f32_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3768,7 +3791,8 @@ define float @v_sqrt_f32_nsz_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3911,7 +3935,8 @@ define float @v_elim_redun_check_ult_sqrt_ulp3(float %in) {
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v0, v1
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 196a3705ac8187..1a3d00211ca9b5 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -40,8 +40,8 @@ define double @v_sqrt_f64(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -100,8 +100,8 @@ define double @v_sqrt_f64_fneg(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -161,8 +161,8 @@ define double @v_sqrt_f64_fabs(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -222,8 +222,8 @@ define double @v_sqrt_f64_fneg_fabs(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -284,8 +284,8 @@ define double @v_sqrt_f64_ninf(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -344,8 +344,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true"
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -404,8 +404,8 @@ define double @v_sqrt_f64_nnan(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -464,8 +464,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64(double inreg %x) {
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -533,8 +533,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -602,8 +602,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -671,8 +671,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -740,8 +740,8 @@ define double @v_sqrt_f64_nsz(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -800,8 +800,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -860,8 +860,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -920,8 +920,8 @@ define double @v_sqrt_f64_afn(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -980,8 +980,8 @@ define double @v_sqrt_f64_afn_nsz(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1062,10 +1062,11 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GISEL-NEXT: v_mov_b32_e32 v5, s5
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
-; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -1139,8 +1140,8 @@ define double @v_sqrt_f64_afn_nnan(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1199,8 +1200,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1260,8 +1261,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1342,10 +1343,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GISEL-NEXT: v_mov_b32_e32 v5, s5
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
-; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -1419,8 +1421,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1479,8 +1481,8 @@ define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1539,8 +1541,8 @@ define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1599,8 +1601,8 @@ define double @v_sqrt_f64__unsafe_attr(double %x) #4 {
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1681,10 +1683,11 @@ define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
; GISEL-NEXT: v_mov_b32_e32 v5, s5
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
-; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -1796,16 +1799,18 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_mov_b32_e32 v6, s4
-; GISEL-NEXT: v_mov_b32_e32 v7, s5
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GISEL-NEXT: v_mov_b32_e32 v7, s5
; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
; GISEL-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[6:7]
-; GISEL-NEXT: v_mov_b32_e32 v8, 0x100
-; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
-; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, v8, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v8, s[6:7]
-; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v9
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
+; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7]
+; GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
+; GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8
; GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[2:3]
@@ -1824,8 +1829,8 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
; GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[20:21], v[8:9]
; GISEL-NEXT: v_fma_f64 v[14:15], v[14:15], v[20:21], v[14:15]
; GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[22:23], v[10:11]
-; GISEL-NEXT: v_fma_f64 v[16:17], v[16:17], v[22:23], v[16:17]
; GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[6:7], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[16:17], v[16:17], v[22:23], v[16:17]
; GISEL-NEXT: v_fma_f64 v[20:21], -v[8:9], v[8:9], v[2:3]
; GISEL-NEXT: v_fma_f64 v[22:23], -v[10:11], v[10:11], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], v[18:19], v[12:13], v[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
index b3912aea55f791..fcc57b8bb7075e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
@@ -102,9 +102,9 @@ define amdgpu_cs void @vgpr_inverse_ballot(i64 %input, ptr addrspace(1) %out) {
; GISEL: ; %bb.0: ; %entry
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: v_readfirstlane_b32 s1, v1
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GISEL-NEXT: v_mov_b32_e32 v5, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1]
+; GISEL-NEXT: global_store_b64 v[2:3], v[4:5], off
; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
@@ -164,8 +164,8 @@ define amdgpu_cs void @phi_uniform(i64 inreg %s0_1, i64 inreg %s2, ptr addrspace
; GISEL-NEXT: s_add_u32 s0, s0, 1
; GISEL-NEXT: s_addc_u32 s1, s1, 0
; GISEL-NEXT: .LBB5_2: ; %endif
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 3dc565ceed0d0b..3ad98719c689c9 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -62,12 +62,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -146,8 +146,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -242,12 +242,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -326,8 +326,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1]
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -423,12 +423,12 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -507,8 +507,8 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -603,12 +603,12 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -687,8 +687,8 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -784,12 +784,12 @@ define double @v_rsq_f64(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -866,8 +866,8 @@ define double @v_rsq_f64(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -953,12 +953,12 @@ define double @v_rsq_f64_fabs(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -1035,8 +1035,8 @@ define double @v_rsq_f64_fabs(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1123,12 +1123,12 @@ define double @v_rsq_f64_missing_contract0(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -1205,8 +1205,8 @@ define double @v_rsq_f64_missing_contract0(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1292,12 +1292,12 @@ define double @v_rsq_f64_missing_contract1(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -1374,8 +1374,8 @@ define double @v_rsq_f64_missing_contract1(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1461,12 +1461,12 @@ define double @v_neg_rsq_f64(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -1543,8 +1543,8 @@ define double @v_neg_rsq_f64(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -1664,26 +1664,27 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: s_mov_b32 s4, 0
; SI-GISEL-NEXT: s_brev_b32 s5, 8
; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0x3ff00000
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -1815,10 +1816,11 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -1965,26 +1967,27 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: s_mov_b32 s4, 0
; SI-GISEL-NEXT: s_brev_b32 s5, 8
; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0xbff00000
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -2116,10 +2119,11 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -2234,17 +2238,17 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; SI-GISEL-NEXT: s_mov_b32 s4, 0
; SI-GISEL-NEXT: s_brev_b32 s5, 8
; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
@@ -2252,7 +2256,8 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -2356,10 +2361,11 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -2507,17 +2513,17 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: s_mov_b32 s4, 0
; SI-GISEL-NEXT: s_brev_b32 s5, 8
; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
+; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
@@ -2525,7 +2531,8 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -2659,10 +2666,11 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -2775,12 +2783,12 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -2857,8 +2865,8 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -2946,12 +2954,12 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -3028,8 +3036,8 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -3107,12 +3115,12 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3177,8 +3185,8 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -3252,12 +3260,12 @@ define double @v_rsq_f64__afn(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3322,8 +3330,8 @@ define double @v_rsq_f64__afn(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -3398,12 +3406,12 @@ define double @v_neg_rsq_f64__afn(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3470,8 +3478,8 @@ define double @v_neg_rsq_f64__afn(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -3546,12 +3554,12 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3616,8 +3624,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -3691,12 +3699,12 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3761,8 +3769,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -3836,12 +3844,12 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -3906,8 +3914,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -3982,12 +3990,12 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4054,8 +4062,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -4138,12 +4146,12 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@@ -4220,8 +4228,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -4325,13 +4333,15 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-GISEL-NEXT: s_mov_b32 s4, 0
; SI-GISEL-NEXT: s_brev_b32 s5, 8
; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
+; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
@@ -4339,30 +4349,29 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
@@ -4451,10 +4460,11 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5]
-; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
@@ -4550,12 +4560,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4622,8 +4632,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -4706,12 +4716,12 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -4776,8 +4786,8 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
@@ -5112,12 +5122,12 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) {
; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
@@ -5193,8 +5203,8 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) {
; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
@@ -5279,12 +5289,12 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) {
; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
@@ -5360,8 +5370,8 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) {
; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
@@ -5446,12 +5456,12 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) {
; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
@@ -5527,8 +5537,8 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) {
; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
@@ -5616,17 +5626,17 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-NEXT: s_mov_b32 s6, 0
; SI-GISEL-NEXT: s_mov_b32 s7, 0x40700000
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5702,10 +5712,10 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100
; VI-GISEL-NEXT: s_mov_b32 s4, 0
; VI-GISEL-NEXT: s_mov_b32 s5, 0x40700000
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
More information about the llvm-commits
mailing list