[llvm] AMDGPU: Optimize set_rounding if input is known to fit in 2 bits (PR #88588)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 12 15:52:56 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 4078afc6d23e25df6baedad61b224ef86a94d42f c7c8da41d647921f8416ed6128d9cd9766759d7b -- llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIISelLowering.h llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a76481bb72..38c766404f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4067,11 +4067,11 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
// Index a table of 4-bit entries mapping from the C FLT_ROUNDS values to the
// hardware MODE.fp_round values.
if (auto *ConstMode = dyn_cast<ConstantSDNode>(NewMode)) {
- uint32_t ClampedVal = std::min(
- static_cast<uint32_t>(ConstMode->getZExtValue()),
- static_cast<uint32_t>(AMDGPU::TowardZeroF32_TowardNegativeF64));
- NewMode = DAG.getConstant(
- AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
+ uint32_t ClampedVal = std::min(
+ static_cast<uint32_t>(ConstMode->getZExtValue()),
+ static_cast<uint32_t>(AMDGPU::TowardZeroF32_TowardNegativeF64));
+ NewMode = DAG.getConstant(
+ AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
} else {
// If we know the input can only be one of the supported standard modes in
// the range 0-3, we can use a simplified mapping to hardware values.
@@ -4083,14 +4083,14 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
if (UseReducedTable) {
// Truncate to the low 32-bits.
SDValue BitTable = DAG.getConstant(
- AMDGPU::FltRoundToHWConversionTable & 0xffff, SL, MVT::i32);
+ AMDGPU::FltRoundToHWConversionTable & 0xffff, SL, MVT::i32);
SDValue Two = DAG.getConstant(2, SL, MVT::i32);
SDValue RoundModeTimesNumBits =
- DAG.getNode(ISD::SHL, SL, MVT::i32, NewMode, Two);
+ DAG.getNode(ISD::SHL, SL, MVT::i32, NewMode, Two);
SDValue TableValue =
- DAG.getNode(ISD::SRL, SL, MVT::i32, BitTable, RoundModeTimesNumBits);
+ DAG.getNode(ISD::SRL, SL, MVT::i32, BitTable, RoundModeTimesNumBits);
NewMode = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
// TODO: SimplifyDemandedBits on the setreg source here can likely reduce
@@ -4100,11 +4100,11 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
// table_index = is_standard ? value : (value - 4)
// MODE.fp_round = (bit_table >> table_index) & 0xf
SDValue BitTable =
- DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
+ DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
SDValue Four = DAG.getConstant(4, SL, MVT::i32);
SDValue IsStandardValue =
- DAG.getSetCC(SL, MVT::i1, NewMode, Four, ISD::SETULT);
+ DAG.getSetCC(SL, MVT::i1, NewMode, Four, ISD::SETULT);
SDValue OffsetEnum = DAG.getNode(ISD::SUB, SL, MVT::i32, NewMode, Four);
SDValue IndexVal = DAG.getNode(ISD::SELECT, SL, MVT::i32, IsStandardValue,
@@ -4112,10 +4112,10 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
SDValue Two = DAG.getConstant(2, SL, MVT::i32);
SDValue RoundModeTimesNumBits =
- DAG.getNode(ISD::SHL, SL, MVT::i32, IndexVal, Two);
+ DAG.getNode(ISD::SHL, SL, MVT::i32, IndexVal, Two);
SDValue TableValue =
- DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
+ DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
// No need to mask out the high bits since the setreg will ignore them
@@ -4127,7 +4127,7 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
// earlier and keep more operations scalar, but that interferes with
// combining the source.
SDValue ReadFirstLaneID =
- DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, SL, MVT::i32);
+ DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, SL, MVT::i32);
NewMode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32,
ReadFirstLaneID, NewMode);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/88588
More information about the llvm-commits
mailing list