[llvm] AMDGPU: Optimize set_rounding if input is known to fit in 2 bits (PR #88588)

Fri Apr 12 15:52:56 PDT 2024

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 4078afc6d23e25df6baedad61b224ef86a94d42f c7c8da41d647921f8416ed6128d9cd9766759d7b -- llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIISelLowering.h llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a76481bb72..38c766404f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4067,11 +4067,11 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
   // Index a table of 4-bit entries mapping from the C FLT_ROUNDS values to the
   // hardware MODE.fp_round values.
   if (auto *ConstMode = dyn_cast<ConstantSDNode>(NewMode)) {
-      uint32_t ClampedVal = std::min(
-          static_cast<uint32_t>(ConstMode->getZExtValue()),
-          static_cast<uint32_t>(AMDGPU::TowardZeroF32_TowardNegativeF64));
-      NewMode = DAG.getConstant(
-          AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
+    uint32_t ClampedVal = std::min(
+        static_cast<uint32_t>(ConstMode->getZExtValue()),
+        static_cast<uint32_t>(AMDGPU::TowardZeroF32_TowardNegativeF64));
+    NewMode = DAG.getConstant(
+        AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
   } else {
     // If we know the input can only be one of the supported standard modes in
     // the range 0-3, we can use a simplified mapping to hardware values.
@@ -4083,14 +4083,14 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
     if (UseReducedTable) {
       // Truncate to the low 32-bits.
       SDValue BitTable = DAG.getConstant(
-        AMDGPU::FltRoundToHWConversionTable & 0xffff, SL, MVT::i32);
+          AMDGPU::FltRoundToHWConversionTable & 0xffff, SL, MVT::i32);
 
       SDValue Two = DAG.getConstant(2, SL, MVT::i32);
       SDValue RoundModeTimesNumBits =
-        DAG.getNode(ISD::SHL, SL, MVT::i32, NewMode, Two);
+          DAG.getNode(ISD::SHL, SL, MVT::i32, NewMode, Two);
 
       SDValue TableValue =
-        DAG.getNode(ISD::SRL, SL, MVT::i32, BitTable, RoundModeTimesNumBits);
+          DAG.getNode(ISD::SRL, SL, MVT::i32, BitTable, RoundModeTimesNumBits);
       NewMode = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
 
       // TODO: SimplifyDemandedBits on the setreg source here can likely reduce
@@ -4100,11 +4100,11 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
       // table_index = is_standard ? value : (value - 4)
       // MODE.fp_round = (bit_table >> table_index) & 0xf
       SDValue BitTable =
-        DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
+          DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
 
       SDValue Four = DAG.getConstant(4, SL, MVT::i32);
       SDValue IsStandardValue =
-        DAG.getSetCC(SL, MVT::i1, NewMode, Four, ISD::SETULT);
+          DAG.getSetCC(SL, MVT::i1, NewMode, Four, ISD::SETULT);
       SDValue OffsetEnum = DAG.getNode(ISD::SUB, SL, MVT::i32, NewMode, Four);
 
       SDValue IndexVal = DAG.getNode(ISD::SELECT, SL, MVT::i32, IsStandardValue,
@@ -4112,10 +4112,10 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
 
       SDValue Two = DAG.getConstant(2, SL, MVT::i32);
       SDValue RoundModeTimesNumBits =
-        DAG.getNode(ISD::SHL, SL, MVT::i32, IndexVal, Two);
+          DAG.getNode(ISD::SHL, SL, MVT::i32, IndexVal, Two);
 
       SDValue TableValue =
-        DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
+          DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
       SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
 
       // No need to mask out the high bits since the setreg will ignore them
@@ -4127,7 +4127,7 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
     // earlier and keep more operations scalar, but that interferes with
     // combining the source.
     SDValue ReadFirstLaneID =
-      DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, SL, MVT::i32);
+        DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, SL, MVT::i32);
     NewMode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32,
                           ReadFirstLaneID, NewMode);
   }

``````````

</details>


https://github.com/llvm/llvm-project/pull/88588