[llvm] AMDGPU: Optimize set_rounding if input is known to fit in 2 bits (PR #88588)

Thu Apr 18 07:00:19 PDT 2024

================
@@ -4073,34 +4073,55 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
       NewMode = DAG.getConstant(
           AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
   } else {
-    SDValue BitTable =
-      DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
-
+    // If we know the input can only be one of the supported standard modes in
+    // the range 0-3, we can use a simplified mapping to hardware values.
+    KnownBits KB = DAG.computeKnownBits(NewMode);
+    const bool UseReducedTable = KB.countMinLeadingZeros() >= 30;
     // The supported standard values are 0-3. The extended values start at 8. We
     // need to offset by 4 if the value is in the extended range.
 
-    // is_standard = value < 4;
-    // table_index = is_standard ? value : (value - 4)
-    // MODE.fp_round = (bit_table >> table_index) & 0xf
-
-    SDValue Four = DAG.getConstant(4, SL, MVT::i32);
-    SDValue IsStandardValue =
-      DAG.getSetCC(SL, MVT::i1, NewMode, Four, ISD::SETULT);
-    SDValue OffsetEnum = DAG.getNode(ISD::SUB, SL, MVT::i32, NewMode, Four);
-    SDValue IndexVal = DAG.getNode(ISD::SELECT, SL, MVT::i32, IsStandardValue,
-                                   NewMode, OffsetEnum);
+    if (UseReducedTable) {
+      // Truncate to the low 32-bits.
+      SDValue BitTable = DAG.getConstant(
+        AMDGPU::FltRoundToHWConversionTable & 0xffff, SL, MVT::i32);
 
-    SDValue Two = DAG.getConstant(2, SL, MVT::i32);
-    SDValue RoundModeTimesNumBits =
-      DAG.getNode(ISD::SHL, SL, MVT::i32, IndexVal, Two);
+      SDValue Two = DAG.getConstant(2, SL, MVT::i32);
+      SDValue RoundModeTimesNumBits =
+        DAG.getNode(ISD::SHL, SL, MVT::i32, NewMode, Two);
 
-    SDValue TableValue =
-      DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
-    SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
+      SDValue TableValue =
+        DAG.getNode(ISD::SRL, SL, MVT::i32, BitTable, RoundModeTimesNumBits);
+      NewMode = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
----------------
jayfoad wrote:

Don't need this? It's already i32.

https://github.com/llvm/llvm-project/pull/88588