[llvm] 0d493ed - Revert 4 last AMDGPU commits to unbreak Windows bots
Mehdi Amini via llvm-commits
llvm-commits at lists.llvm.org
Sat May 4 10:58:10 PDT 2024
Author: Mehdi Amini
Date: 2024-05-04T19:57:33+02:00
New Revision: 0d493ed2c6e664849a979b357a606dcd8273b03f
URL: https://github.com/llvm/llvm-project/commit/0d493ed2c6e664849a979b357a606dcd8273b03f
DIFF: https://github.com/llvm/llvm-project/commit/0d493ed2c6e664849a979b357a606dcd8273b03f.diff
LOG: Revert 4 last AMDGPU commits to unbreak Windows bots
Revert "AMDGPU: Try to fix build error with old gcc"
This reverts commit c7ad12d0d7606b0b9fb531b0b273bdc5f1490ddb.
Revert "AMDGPU: Use umin in set.rounding expansion"
This reverts commit a56f0b51dd988ad2b533de759c98457c1ed42456.
Revert "AMDGPU: Optimize set_rounding if input is known to fit in 2 bits (#88588)"
This reverts commit b4e751e2ab0ff152ed18dea59ebf9691e963e1dd.
Revert "AMDGPU: Implement llvm.set.rounding (#88587)"
This reverts commit 9731b77e80261c627d79980f8c275700bdaf6591.
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/docs/LangRef.rst
llvm/docs/ReleaseNotes.rst
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.h
llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
Removed:
llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 51969be85648f1..029db00134c09d 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1157,12 +1157,6 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
register do not exactly match the FLT_ROUNDS values,
so a conversion is performed.
- :ref:`llvm.set.rounding<int_set_rounding>` Input value expected to be one of the valid results
- from '``llvm.get.rounding``'. Rounding mode is
- undefined if not passed a valid input. This should be
- a wave uniform value. In case of a divergent input
- value, the first active lane's value will be used.
-
:ref:`llvm.get.fpenv<int_get_fpenv>` Returns the current value of the AMDGPU floating point environment.
This stores information related to the current rounding mode,
denormalization mode, enabled traps, and floating point exceptions.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2077fdd841fcd6..6291a4e57919a5 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -26739,8 +26739,6 @@ specified by C standard:
Other values may be used to represent additional rounding modes, supported by a
target. These values are target-specific.
-.. _int_set_rounding:
-
'``llvm.set.rounding``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 0f4e2759de08ac..59c0d4dd2376dd 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -81,8 +81,6 @@ Changes to the AMDGPU Backend
* Implemented the ``llvm.get.fpenv`` and ``llvm.set.fpenv`` intrinsics.
-* Implemented :ref:`llvm.get.rounding <int_get_rounding>` and :ref:`llvm.set.rounding <int_set_rounding>`
-
Changes to the ARM Backend
--------------------------
* FEAT_F32MM is no longer activated by default when using `+sve` on v8.6-A or greater. The feature is still available and can be used by adding `+f32mm` to the command line options.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ed41c10b50d323..cb4efdc7cf657c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -877,7 +877,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
- setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
setOperationAction(ISD::GET_FPENV, MVT::i64, Custom);
setOperationAction(ISD::SET_FPENV, MVT::i64, Custom);
@@ -4060,91 +4059,6 @@ SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
return DAG.getMergeValues({Result, GetReg.getValue(1)}, SL);
}
-SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc SL(Op);
-
- SDValue NewMode = Op.getOperand(1);
- assert(NewMode.getValueType() == MVT::i32);
-
- // Index a table of 4-bit entries mapping from the C FLT_ROUNDS values to the
- // hardware MODE.fp_round values.
- if (auto *ConstMode = dyn_cast<ConstantSDNode>(NewMode)) {
- uint32_t ClampedVal = std::min(
- static_cast<uint32_t>(ConstMode->getZExtValue()),
- static_cast<uint32_t>(AMDGPU::TowardZeroF32_TowardNegativeF64));
- NewMode = DAG.getConstant(
- AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
- } else {
- // If we know the input can only be one of the supported standard modes in
- // the range 0-3, we can use a simplified mapping to hardware values.
- KnownBits KB = DAG.computeKnownBits(NewMode);
- const bool UseReducedTable = KB.countMinLeadingZeros() >= 30;
- // The supported standard values are 0-3. The extended values start at 8. We
- // need to offset by 4 if the value is in the extended range.
-
- if (UseReducedTable) {
- // Truncate to the low 32-bits.
- SDValue BitTable = DAG.getConstant(
- AMDGPU::FltRoundToHWConversionTable & 0xffff, SL, MVT::i32);
-
- SDValue Two = DAG.getConstant(2, SL, MVT::i32);
- SDValue RoundModeTimesNumBits =
- DAG.getNode(ISD::SHL, SL, MVT::i32, NewMode, Two);
-
- NewMode =
- DAG.getNode(ISD::SRL, SL, MVT::i32, BitTable, RoundModeTimesNumBits);
-
- // TODO: SimplifyDemandedBits on the setreg source here can likely reduce
- // the table extracted bits into inline immediates.
- } else {
- // table_index = umin(value, value - 4)
- // MODE.fp_round = (bit_table >> (table_index << 2)) & 0xf
- SDValue BitTable =
- DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
-
- SDValue Four = DAG.getConstant(4, SL, MVT::i32);
- SDValue OffsetEnum = DAG.getNode(ISD::SUB, SL, MVT::i32, NewMode, Four);
- SDValue IndexVal =
- DAG.getNode(ISD::UMIN, SL, MVT::i32, NewMode, OffsetEnum);
-
- SDValue Two = DAG.getConstant(2, SL, MVT::i32);
- SDValue RoundModeTimesNumBits =
- DAG.getNode(ISD::SHL, SL, MVT::i32, IndexVal, Two);
-
- SDValue TableValue =
- DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
- SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
-
- // No need to mask out the high bits since the setreg will ignore them
- // anyway.
- NewMode = TruncTable;
- }
-
- // Insert a readfirstlane in case the value is a VGPR. We could do this
- // earlier and keep more operations scalar, but that interferes with
- // combining the source.
- SDValue ReadFirstLaneID =
- DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, SL, MVT::i32);
- NewMode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32,
- ReadFirstLaneID, NewMode);
- }
-
- // N.B. The setreg will be later folded into s_round_mode on supported
- // targets.
- SDValue IntrinID =
- DAG.getTargetConstant(Intrinsic::amdgcn_s_setreg, SL, MVT::i32);
- uint32_t BothRoundHwReg =
- AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 4);
- SDValue RoundBothImm = DAG.getTargetConstant(BothRoundHwReg, SL, MVT::i32);
-
- SDValue SetReg =
- DAG.getNode(ISD::INTRINSIC_VOID, SL, Op->getVTList(), Op.getOperand(0),
- IntrinID, RoundBothImm, NewMode);
-
- return SetReg;
-}
-
SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
if (Op->isDivergent())
return SDValue();
@@ -5840,8 +5754,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTACKSAVE(Op, DAG);
case ISD::GET_ROUNDING:
return lowerGET_ROUNDING(Op, DAG);
- case ISD::SET_ROUNDING:
- return lowerSET_ROUNDING(Op, DAG);
case ISD::PREFETCH:
return lowerPREFETCH(Op, DAG);
case ISD::FP_EXTEND:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 08aa2a5991631d..9856a2923d38f7 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -422,7 +422,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
index 72bffc8400fa65..2684a1e3c3358a 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
@@ -174,122 +174,3 @@ static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
HWTowardNegative, HWTowardPositive)) ==
TowardNegativeF32_TowardPositiveF64);
-
-// Decode FLT_ROUNDS into the hardware value where the two rounding modes are
-// the same and use a standard value
-static constexpr uint64_t encodeFltRoundsToHWTableSame(uint32_t HWVal,
- uint32_t FltRoundsVal) {
- if (FltRoundsVal > TowardNegative)
- FltRoundsVal -= ExtendedFltRoundOffset;
-
- return static_cast<uint64_t>(getModeRegisterRoundMode(HWVal, HWVal))
- << (FltRoundsVal << 2);
-}
-
-/// Decode FLT_ROUNDS into the hardware value where the two rounding modes
-///
diff erent and use an extended value.
-static constexpr uint64_t encodeFltRoundsToHWTable(uint32_t HWF32Val,
- uint32_t HWF64Val,
- uint32_t FltRoundsVal) {
- if (FltRoundsVal > TowardNegative)
- FltRoundsVal -= ExtendedFltRoundOffset;
- return static_cast<uint64_t>(getModeRegisterRoundMode(HWF32Val, HWF64Val))
- << (FltRoundsVal << 2);
-}
-
-constexpr uint64_t AMDGPU::FltRoundToHWConversionTable =
- encodeFltRoundsToHWTableSame(HWTowardZero, TowardZeroF32_TowardZeroF64) |
- encodeFltRoundsToHWTableSame(HWNearestTiesToEven,
- NearestTiesToEvenF32_NearestTiesToEvenF64) |
- encodeFltRoundsToHWTableSame(HWTowardPositive,
- TowardPositiveF32_TowardPositiveF64) |
- encodeFltRoundsToHWTableSame(HWTowardNegative,
- TowardNegativeF32_TowardNegativeF64) |
-
- encodeFltRoundsToHWTable(HWTowardZero, HWNearestTiesToEven,
- TowardZeroF32_NearestTiesToEvenF64) |
- encodeFltRoundsToHWTable(HWTowardZero, HWTowardPositive,
- TowardZeroF32_TowardPositiveF64) |
- encodeFltRoundsToHWTable(HWTowardZero, HWTowardNegative,
- TowardZeroF32_TowardNegativeF64) |
-
- encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardZero,
- NearestTiesToEvenF32_TowardZeroF64) |
- encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardPositive,
- NearestTiesToEvenF32_TowardPositiveF64) |
- encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardNegative,
- NearestTiesToEvenF32_TowardNegativeF64) |
-
- encodeFltRoundsToHWTable(HWTowardPositive, HWTowardZero,
- TowardPositiveF32_TowardZeroF64) |
- encodeFltRoundsToHWTable(HWTowardPositive, HWNearestTiesToEven,
- TowardPositiveF32_NearestTiesToEvenF64) |
- encodeFltRoundsToHWTable(HWTowardPositive, HWTowardNegative,
- TowardPositiveF32_TowardNegativeF64) |
-
- encodeFltRoundsToHWTable(HWTowardNegative, HWTowardZero,
- TowardNegativeF32_TowardZeroF64) |
- encodeFltRoundsToHWTable(HWTowardNegative, HWNearestTiesToEven,
- TowardNegativeF32_NearestTiesToEvenF64) |
- encodeFltRoundsToHWTable(HWTowardNegative, HWTowardPositive,
- TowardNegativeF32_TowardPositiveF64);
-
-/// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
-static constexpr uint32_t
-decodeFltRoundToHWConversionTable(uint64_t FltRoundToHWConversionTable,
- uint32_t FltRounds) {
- uint32_t IndexVal = FltRounds;
- if (IndexVal > TowardNegative)
- IndexVal -= ExtendedFltRoundOffset;
- return (FltRoundToHWConversionTable >> (IndexVal << 2)) & 0xf;
-}
-
-uint32_t AMDGPU::decodeFltRoundToHWConversionTable(uint32_t FltRounds) {
- return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
- FltRounds);
-}
-
-static constexpr uint32_t decodeFltRoundToHW(uint32_t FltRounds) {
- return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
- FltRounds);
-}
-
-// Verify evaluation of FltRoundToHWConversionTable
-
-static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardZero) ==
- getModeRegisterRoundMode(HWTowardZero, HWTowardZero));
-static_assert(decodeFltRoundToHW(AMDGPUFltRounds::NearestTiesToEven) ==
- getModeRegisterRoundMode(HWNearestTiesToEven,
- HWNearestTiesToEven));
-static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardPositive) ==
- getModeRegisterRoundMode(HWTowardPositive, HWTowardPositive));
-static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardNegative) ==
- getModeRegisterRoundMode(HWTowardNegative, HWTowardNegative));
-
-static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardPositiveF64) ==
- getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardPositive));
-static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardNegativeF64) ==
- getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardNegative));
-static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardZeroF64) ==
- getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardZero));
-
-static_assert(decodeFltRoundToHW(TowardPositiveF32_NearestTiesToEvenF64) ==
- getModeRegisterRoundMode(HWTowardPositive, HWNearestTiesToEven));
-static_assert(decodeFltRoundToHW(TowardPositiveF32_TowardNegativeF64) ==
- getModeRegisterRoundMode(HWTowardPositive, HWTowardNegative));
-static_assert(decodeFltRoundToHW(TowardPositiveF32_TowardZeroF64) ==
- getModeRegisterRoundMode(HWTowardPositive, HWTowardZero));
-
-static_assert(decodeFltRoundToHW(TowardNegativeF32_NearestTiesToEvenF64) ==
- getModeRegisterRoundMode(HWTowardNegative, HWNearestTiesToEven));
-static_assert(decodeFltRoundToHW(TowardNegativeF32_TowardPositiveF64) ==
- getModeRegisterRoundMode(HWTowardNegative, HWTowardPositive));
-static_assert(decodeFltRoundToHW(TowardNegativeF32_TowardZeroF64) ==
- getModeRegisterRoundMode(HWTowardNegative, HWTowardZero));
-
-static_assert(decodeFltRoundToHW(TowardZeroF32_NearestTiesToEvenF64) ==
- getModeRegisterRoundMode(HWTowardZero, HWNearestTiesToEven));
-static_assert(decodeFltRoundToHW(TowardZeroF32_TowardPositiveF64) ==
- getModeRegisterRoundMode(HWTowardZero, HWTowardPositive));
-static_assert(decodeFltRoundToHW(TowardZeroF32_TowardNegativeF64) ==
- getModeRegisterRoundMode(HWTowardZero, HWTowardNegative));
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
index c86678a7325356..9fbd74c3eede32 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
+++ b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
@@ -144,13 +144,6 @@ static constexpr uint32_t F64FltRoundOffset = 2;
// values.
extern const uint64_t FltRoundConversionTable;
-// Bit indexed table to convert from FLT_ROUNDS values to hardware rounding mode
-// values
-extern const uint64_t FltRoundToHWConversionTable;
-
-/// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
-uint32_t decodeFltRoundToHWConversionTable(uint32_t FltRounds);
-
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
deleted file mode 100644
index 48abc49c41ae0a..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
+++ /dev/null
@@ -1,1665 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
-; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
-; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
-
-declare void @llvm.set.rounding(i32)
-declare i32 @llvm.get.rounding()
-
-define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
-; GFX678-LABEL: s_set_rounding:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_add_i32 s34, s4, -4
-; GFX678-NEXT: s_min_u32 s34, s4, s34
-; GFX678-NEXT: s_lshl_b32 s36, s34, 2
-; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_add_i32 s34, s4, -4
-; GFX9-NEXT: s_min_u32 s34, s4, s34
-; GFX9-NEXT: s_lshl_b32 s36, s34, 2
-; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_add_i32 s34, s4, -4
-; GFX10-NEXT: s_min_u32 s34, s4, s34
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
-; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_add_i32 s0, s4, -4
-; GFX11-NEXT: s_min_u32 s0, s4, s0
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
-; GFX6-LABEL: s_set_rounding_kernel:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX6-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX6-NEXT: ;;#ASMSTART
-; GFX6-NEXT: ;;#ASMEND
-; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_add_i32 s3, s2, -4
-; GFX6-NEXT: s_min_u32 s2, s2, s3
-; GFX6-NEXT: s_lshl_b32 s2, s2, 2
-; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: s_set_rounding_kernel:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_load_dword s2, s[0:1], 0x9
-; GFX7-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX7-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ;;#ASMEND
-; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: s_add_i32 s3, s2, -4
-; GFX7-NEXT: s_min_u32 s2, s2, s3
-; GFX7-NEXT: s_lshl_b32 s2, s2, 2
-; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX7-NEXT: s_endpgm
-;
-; GFX8-LABEL: s_set_rounding_kernel:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_load_dword s2, s[0:1], 0x24
-; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_add_i32 s3, s2, -4
-; GFX8-NEXT: s_min_u32 s2, s2, s3
-; GFX8-NEXT: s_lshl_b32 s2, s2, 2
-; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: s_set_rounding_kernel:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_add_i32 s3, s2, -4
-; GFX9-NEXT: s_min_u32 s2, s2, s3
-; GFX9-NEXT: s_lshl_b32 s2, s2, 2
-; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX9-NEXT: s_endpgm
-;
-; GFX10-LABEL: s_set_rounding_kernel:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
-; GFX10-NEXT: ;;#ASMSTART
-; GFX10-NEXT: ;;#ASMEND
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_add_i32 s1, s0, -4
-; GFX10-NEXT: s_min_u32 s2, s0, s1
-; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX10-NEXT: s_lshl_b32 s2, s2, 2
-; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_set_rounding_kernel:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x24
-; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s2, s0, s1
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshl_b32 s2, s2, 2
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_endpgm
- call void @llvm.set.rounding(i32 %rounding)
- call void asm sideeffect "",""()
- ret void
-}
-
-define void @v_set_rounding(i32 %rounding) {
-; GFX6-LABEL: v_set_rounding:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0
-; GFX6-NEXT: v_min_u32_e32 v0, v0, v1
-; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
-; GFX6-NEXT: v_readfirstlane_b32 s4, v0
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: v_set_rounding:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0
-; GFX7-NEXT: v_min_u32_e32 v0, v0, v1
-; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
-; GFX7-NEXT: v_readfirstlane_b32 s4, v0
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_set_rounding:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0
-; GFX8-NEXT: v_min_u32_e32 v0, v0, v1
-; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
-; GFX8-NEXT: v_readfirstlane_b32 s4, v0
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_set_rounding:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_u32_e32 v1, -4, v0
-; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
-; GFX9-NEXT: v_readfirstlane_b32 s4, v0
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_set_rounding:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0
-; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX10-NEXT: v_min_u32_e32 v0, v0, v1
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
-; GFX10-NEXT: v_readfirstlane_b32 s4, v0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_set_rounding:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define void @set_rounding_get_rounding() {
-; GFX678-LABEL: set_rounding_get_rounding:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
-; GFX678-NEXT: s_lshl_b32 s6, s4, 2
-; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71
-; GFX678-NEXT: s_mov_b32 s5, 0xc96f385
-; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
-; GFX678-NEXT: s_and_b32 s4, s4, 15
-; GFX678-NEXT: s_add_i32 s5, s4, 4
-; GFX678-NEXT: s_cmp_lt_u32 s4, 4
-; GFX678-NEXT: s_cselect_b32 s4, s4, s5
-; GFX678-NEXT: s_add_i32 s5, s4, -4
-; GFX678-NEXT: s_min_u32 s4, s4, s5
-; GFX678-NEXT: s_lshl_b32 s6, s4, 2
-; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: set_rounding_get_rounding:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
-; GFX9-NEXT: s_lshl_b32 s6, s4, 2
-; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71
-; GFX9-NEXT: s_mov_b32 s5, 0xc96f385
-; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
-; GFX9-NEXT: s_and_b32 s4, s4, 15
-; GFX9-NEXT: s_add_i32 s5, s4, 4
-; GFX9-NEXT: s_cmp_lt_u32 s4, 4
-; GFX9-NEXT: s_cselect_b32 s4, s4, s5
-; GFX9-NEXT: s_add_i32 s5, s4, -4
-; GFX9-NEXT: s_min_u32 s4, s4, s5
-; GFX9-NEXT: s_lshl_b32 s6, s4, 2
-; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: set_rounding_get_rounding:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
-; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71
-; GFX10-NEXT: s_mov_b32 s5, 0xc96f385
-; GFX10-NEXT: s_lshl_b32 s6, s6, 2
-; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
-; GFX10-NEXT: s_and_b32 s4, s4, 15
-; GFX10-NEXT: s_add_i32 s5, s4, 4
-; GFX10-NEXT: s_cmp_lt_u32 s4, 4
-; GFX10-NEXT: s_cselect_b32 s4, s4, s5
-; GFX10-NEXT: s_add_i32 s5, s4, -4
-; GFX10-NEXT: s_min_u32 s4, s4, s5
-; GFX10-NEXT: s_lshl_b32 s6, s4, 2
-; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
-; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: set_rounding_get_rounding:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
-; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
-; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
-; GFX11-NEXT: s_lshl_b32 s2, s2, 2
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_and_b32 s0, s0, 15
-; GFX11-NEXT: s_add_i32 s1, s0, 4
-; GFX11-NEXT: s_cmp_lt_u32 s0, 4
-; GFX11-NEXT: s_cselect_b32 s0, s0, s1
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %rounding = call i32 @llvm.get.rounding()
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define void @s_set_rounding_0() {
-; GFX678-LABEL: s_set_rounding_0:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_0:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xf
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 0)
- ret void
-}
-
-define void @s_set_rounding_1() {
-; GFX678-LABEL: s_set_rounding_1:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_1:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 1)
- ret void
-}
-
-define void @s_set_rounding_2() {
-; GFX678-LABEL: s_set_rounding_2:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_2:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x5
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 2)
- ret void
-}
-
-define void @s_set_rounding_3() {
-; GFX678-LABEL: s_set_rounding_3:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_3:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xa
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 3)
- ret void
-}
-
-; Unsupported mode.
-define void @s_set_rounding_4() {
-; GFX678-LABEL: s_set_rounding_4:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_4:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xf
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 4)
- ret void
-}
-
-; undefined
-define void @s_set_rounding_5() {
-; GFX678-LABEL: s_set_rounding_5:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_5:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 5)
- ret void
-}
-
-; undefined
-define void @s_set_rounding_6() {
-; GFX678-LABEL: s_set_rounding_6:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_6:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x5
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 6)
- ret void
-}
-
-; "Dynamic"
-define void @s_set_rounding_7() {
-; GFX678-LABEL: s_set_rounding_7:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_7:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xa
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 7)
- ret void
-}
-
-; Invalid
-define void @s_set_rounding_neg1() {
-; GFX678-LABEL: s_set_rounding_neg1:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_neg1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_neg1:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xb
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 -1)
- ret void
-}
-
-; --------------------------------------------------------------------
-; Test extended values
-; --------------------------------------------------------------------
-
-; NearestTiesToEvenF32_TowardPositiveF64 = 8
-define void @s_set_rounding_8() {
-; GFX678-LABEL: s_set_rounding_8:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_8:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x4
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 8)
- ret void
-}
-
-; NearestTiesToEvenF32_TowardNegativeF64 = 9
-define void @s_set_rounding_9() {
-; GFX678-LABEL: s_set_rounding_9:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_9:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x8
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 9)
- ret void
-}
-
-; NearestTiesToEvenF32_TowardZeroF64 = 10
-define void @s_set_rounding_10() {
-; GFX678-LABEL: s_set_rounding_10:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_10:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_10:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xc
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 10)
- ret void
-}
-
-; TowardPositiveF32_NearestTiesToEvenF64 = 11
-define void @s_set_rounding_11() {
-; GFX678-LABEL: s_set_rounding_11:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_11:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x1
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 11)
- ret void
-}
-
-; TowardPositiveF32_TowardNegativeF64 = 12
-define void @s_set_rounding_12() {
-; GFX678-LABEL: s_set_rounding_12:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_12:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x9
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 12)
- ret void
-}
-
-; TowardPositiveF32_TowardZeroF64 = 13
-define void @s_set_rounding_13() {
-; GFX678-LABEL: s_set_rounding_13:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_13:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_13:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xd
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 13)
- ret void
-}
-
-; TowardNegativeF32_NearestTiesToEvenF64 = 14
-define void @s_set_rounding_14() {
-; GFX678-LABEL: s_set_rounding_14:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_14:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_14:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 14)
- ret void
-}
-
-; TowardNegativeF32_TowardPositiveF64 = 15
-define void @s_set_rounding_15() {
-; GFX678-LABEL: s_set_rounding_15:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_15:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_15:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x6
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 15)
- ret void
-}
-
-
-; TowardNegativeF32_TowardZeroF64 = 16
-define void @s_set_rounding_16() {
-; GFX678-LABEL: s_set_rounding_16:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xe
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 16)
- ret void
-}
-
-; TowardZeroF32_NearestTiesToEvenF64 = 17
-define void @s_set_rounding_17() {
-; GFX678-LABEL: s_set_rounding_17:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_17:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_17:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x3
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 17)
- ret void
-}
-
-; TowardZeroF32_TowardPositiveF64 = 18
-define void @s_set_rounding_18() {
-; GFX678-LABEL: s_set_rounding_18:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_18:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_18:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0x7
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 18)
- ret void
-}
-
-; TowardZeroF32_TowardNegativeF64 = 19,
-define void @s_set_rounding_19() {
-; GFX678-LABEL: s_set_rounding_19:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_19:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_19:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xb
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 19)
- ret void
-}
-
-; Invalid, out of bounds
-define void @s_set_rounding_20() {
-; GFX678-LABEL: s_set_rounding_20:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_20:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_20:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xb
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 20)
- ret void
-}
-
-define void @s_set_rounding_0xffff() {
-; GFX678-LABEL: s_set_rounding_0xffff:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_0xffff:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: s_set_rounding_0xffff:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_round_mode 0xb
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.set.rounding(i32 65535)
- ret void
-}
-
-; --------------------------------------------------------------------
-; Test optimization knowing the value can only be in the standard
-; range
-; --------------------------------------------------------------------
-
-define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) {
-; GFX6-LABEL: s_set_rounding_i2_zeroext:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s34, s4, 2
-; GFX6-NEXT: s_lshr_b32 s34, 0xa50f, s34
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: s_set_rounding_i2_zeroext:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_lshl_b32 s34, s4, 2
-; GFX7-NEXT: s_lshr_b32 s34, 0xa50f, s34
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: s_set_rounding_i2_zeroext:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
-; GFX8-NEXT: s_lshl_b32 s34, s34, 2
-; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_i2_zeroext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
-; GFX9-NEXT: s_lshl_b32 s34, s34, 2
-; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_i2_zeroext:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
-; GFX10-NEXT: s_lshl_b32 s34, s34, 2
-; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_i2_zeroext:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
-; GFX11-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %zext.rounding = zext i2 %rounding to i32
- call void @llvm.set.rounding(i32 %zext.rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
-; GFX6-LABEL: s_set_rounding_i2_signext:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_add_i32 s34, s4, -4
-; GFX6-NEXT: s_min_u32 s34, s4, s34
-; GFX6-NEXT: s_lshl_b32 s36, s34, 2
-; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: s_set_rounding_i2_signext:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_add_i32 s34, s4, -4
-; GFX7-NEXT: s_min_u32 s34, s4, s34
-; GFX7-NEXT: s_lshl_b32 s36, s34, 2
-; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: s_set_rounding_i2_signext:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_sext_i32_i16 s34, s4
-; GFX8-NEXT: s_add_i32 s35, s34, -4
-; GFX8-NEXT: s_min_u32 s34, s34, s35
-; GFX8-NEXT: s_lshl_b32 s36, s34, 2
-; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_i2_signext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_sext_i32_i16 s34, s4
-; GFX9-NEXT: s_add_i32 s35, s34, -4
-; GFX9-NEXT: s_min_u32 s34, s34, s35
-; GFX9-NEXT: s_lshl_b32 s36, s34, 2
-; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_i2_signext:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_sext_i32_i16 s34, s4
-; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
-; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_i2_signext:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_sext_i32_i16 s0, s4
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %sext.rounding = sext i2 %rounding to i32
- call void @llvm.set.rounding(i32 %sext.rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
-; GFX6-LABEL: s_set_rounding_i3_signext:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_add_i32 s34, s4, -4
-; GFX6-NEXT: s_min_u32 s34, s4, s34
-; GFX6-NEXT: s_lshl_b32 s36, s34, 2
-; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: s_set_rounding_i3_signext:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_add_i32 s34, s4, -4
-; GFX7-NEXT: s_min_u32 s34, s4, s34
-; GFX7-NEXT: s_lshl_b32 s36, s34, 2
-; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: s_set_rounding_i3_signext:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_sext_i32_i16 s34, s4
-; GFX8-NEXT: s_add_i32 s35, s34, -4
-; GFX8-NEXT: s_min_u32 s34, s34, s35
-; GFX8-NEXT: s_lshl_b32 s36, s34, 2
-; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_i3_signext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_sext_i32_i16 s34, s4
-; GFX9-NEXT: s_add_i32 s35, s34, -4
-; GFX9-NEXT: s_min_u32 s34, s34, s35
-; GFX9-NEXT: s_lshl_b32 s36, s34, 2
-; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_i3_signext:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_sext_i32_i16 s34, s4
-; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
-; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_i3_signext:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_sext_i32_i16 s0, s4
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %sext.rounding = sext i3 %rounding to i32
- call void @llvm.set.rounding(i32 %sext.rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
-; GFX6-LABEL: s_set_rounding_i3_zeroext:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_add_i32 s34, s4, -4
-; GFX6-NEXT: s_min_u32 s34, s4, s34
-; GFX6-NEXT: s_lshl_b32 s36, s34, 2
-; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: s_set_rounding_i3_zeroext:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_add_i32 s34, s4, -4
-; GFX7-NEXT: s_min_u32 s34, s4, s34
-; GFX7-NEXT: s_lshl_b32 s36, s34, 2
-; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: s_set_rounding_i3_zeroext:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
-; GFX8-NEXT: s_add_i32 s35, s34, -4
-; GFX8-NEXT: s_min_u32 s34, s34, s35
-; GFX8-NEXT: s_lshl_b32 s36, s34, 2
-; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_i3_zeroext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
-; GFX9-NEXT: s_add_i32 s35, s34, -4
-; GFX9-NEXT: s_min_u32 s34, s34, s35
-; GFX9-NEXT: s_lshl_b32 s36, s34, 2
-; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_i3_zeroext:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
-; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
-; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_i3_zeroext:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %sext.rounding = zext i3 %rounding to i32
- call void @llvm.set.rounding(i32 %sext.rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) {
-; GFX6-LABEL: s_set_rounding_select_0_1:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_cmp_lg_u32 s4, 0
-; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
-; GFX6-NEXT: v_readfirstlane_b32 s34, v0
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: s_set_rounding_select_0_1:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_cmp_lg_u32 s4, 0
-; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
-; GFX7-NEXT: v_readfirstlane_b32 s34, v0
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: s_set_rounding_select_0_1:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_cmp_lg_u32 s4, 0
-; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX8-NEXT: s_mov_b32 s34, 0xa50f
-; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34
-; GFX8-NEXT: v_readfirstlane_b32 s34, v0
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_0_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_lg_u32 s4, 0
-; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_mov_b32 s34, 0xa50f
-; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34
-; GFX9-NEXT: v_readfirstlane_b32 s34, v0
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_0_1:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_lg_u32 s4, 0
-; GFX10-NEXT: s_cselect_b32 s34, -1, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
-; GFX10-NEXT: v_readfirstlane_b32 s34, v0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_0_1:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_lg_u32 s4, 0
-; GFX11-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 0, i32 1
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) {
-; GFX678-LABEL: s_set_rounding_select_1_3:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_cmp_eq_u32 s4, 0
-; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_eq_u32 s4, 0
-; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_1_3:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_eq_u32 s4, 0
-; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_1_3:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_eq_u32 s4, 0
-; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 1, i32 3
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define void @v_set_rounding_select_1_3(i32 %cond) {
-; GFX678-LABEL: v_set_rounding_select_1_3:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50
-; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
-; GFX678-NEXT: v_readfirstlane_b32 s4, v0
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_set_rounding_select_1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
-; GFX9-NEXT: v_readfirstlane_b32 s4, v0
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_set_rounding_select_1_3:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
-; GFX10-NEXT: v_readfirstlane_b32 s4, v0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_set_rounding_select_1_3:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 1, i32 3
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
-; GFX6-LABEL: s_set_rounding_select_2_0:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_cmp_eq_u32 s4, 0
-; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
-; GFX6-NEXT: v_readfirstlane_b32 s34, v0
-; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: s_set_rounding_select_2_0:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_cmp_eq_u32 s4, 0
-; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
-; GFX7-NEXT: v_readfirstlane_b32 s34, v0
-; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: s_set_rounding_select_2_0:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_cmp_eq_u32 s4, 0
-; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; GFX8-NEXT: s_mov_b32 s34, 0xa50f
-; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34
-; GFX8-NEXT: v_readfirstlane_b32 s34, v0
-; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_2_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_eq_u32 s4, 0
-; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; GFX9-NEXT: s_mov_b32 s34, 0xa50f
-; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34
-; GFX9-NEXT: v_readfirstlane_b32 s34, v0
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_2_0:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_eq_u32 s4, 0
-; GFX10-NEXT: s_cselect_b32 s34, -1, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
-; GFX10-NEXT: v_readfirstlane_b32 s34, v0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_2_0:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_eq_u32 s4, 0
-; GFX11-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 2, i32 0
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
-; GFX678-LABEL: s_set_rounding_select_2_1:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_cmp_eq_u32 s4, 0
-; GFX678-NEXT: s_movk_i32 s34, 0xa5
-; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_eq_u32 s4, 0
-; GFX9-NEXT: s_movk_i32 s34, 0xa5
-; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_2_1:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_eq_u32 s4, 0
-; GFX10-NEXT: s_movk_i32 s34, 0xa5
-; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_2_1:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_eq_u32 s4, 0
-; GFX11-NEXT: s_movk_i32 s0, 0xa5
-; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 2, i32 1
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
-; GFX678-LABEL: s_set_rounding_select_1_2:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_cmp_eq_u32 s4, 0
-; GFX678-NEXT: s_movk_i32 s34, 0xa50
-; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_1_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_eq_u32 s4, 0
-; GFX9-NEXT: s_movk_i32 s34, 0xa50
-; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_1_2:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_eq_u32 s4, 0
-; GFX10-NEXT: s_movk_i32 s34, 0xa50
-; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_1_2:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_eq_u32 s4, 0
-; GFX11-NEXT: s_movk_i32 s0, 0xa50
-; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 1, i32 2
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) {
-; GFX678-LABEL: s_set_rounding_select_3_0:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_cmp_eq_u32 s4, 0
-; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_3_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_eq_u32 s4, 0
-; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_3_0:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_eq_u32 s4, 0
-; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_3_0:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_eq_u32 s4, 0
-; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 3, i32 0
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
-; GFX678-LABEL: s_set_rounding_select_4_0:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_cmp_eq_u32 s4, 0
-; GFX678-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX678-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX678-NEXT: v_readfirstlane_b32 s34, v0
-; GFX678-NEXT: s_lshl_b32 s34, s34, 2
-; GFX678-NEXT: s_add_i32 s35, s34, -4
-; GFX678-NEXT: s_min_u32 s34, s34, s35
-; GFX678-NEXT: s_lshl_b32 s36, s34, 2
-; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_4_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_eq_u32 s4, 0
-; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX9-NEXT: v_readfirstlane_b32 s34, v0
-; GFX9-NEXT: s_lshl_b32 s34, s34, 2
-; GFX9-NEXT: s_add_i32 s35, s34, -4
-; GFX9-NEXT: s_min_u32 s34, s34, s35
-; GFX9-NEXT: s_lshl_b32 s36, s34, 2
-; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_4_0:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_eq_u32 s4, 0
-; GFX10-NEXT: s_cselect_b32 s34, -1, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
-; GFX10-NEXT: v_readfirstlane_b32 s34, v0
-; GFX10-NEXT: s_lshl_b32 s34, s34, 2
-; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
-; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_4_0:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_eq_u32 s4, 0
-; GFX11-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 4, i32 0
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
-; GFX678-LABEL: s_set_rounding_select_3_5:
-; GFX678: ; %bb.0:
-; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX678-NEXT: s_cmp_eq_u32 s4, 0
-; GFX678-NEXT: s_cselect_b32 s34, 3, 5
-; GFX678-NEXT: s_add_i32 s35, s34, -4
-; GFX678-NEXT: s_min_u32 s34, s34, s35
-; GFX678-NEXT: s_lshl_b32 s36, s34, 2
-; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX678-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: s_set_rounding_select_3_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_cmp_eq_u32 s4, 0
-; GFX9-NEXT: s_cselect_b32 s34, 3, 5
-; GFX9-NEXT: s_add_i32 s35, s34, -4
-; GFX9-NEXT: s_min_u32 s34, s34, s35
-; GFX9-NEXT: s_lshl_b32 s36, s34, 2
-; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: s_set_rounding_select_3_5:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_cmp_eq_u32 s4, 0
-; GFX10-NEXT: s_cselect_b32 s34, 3, 5
-; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
-; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
-; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: s_set_rounding_select_3_5:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_cmp_eq_u32 s4, 0
-; GFX11-NEXT: s_cselect_b32 s0, 3, 5
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
-; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
-; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %cmp = icmp eq i32 %cond, 0
- %rounding = select i1 %cmp, i32 3, i32 5
- call void @llvm.set.rounding(i32 %rounding)
- ret void
-}
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
More information about the llvm-commits
mailing list