[llvm] AMDGPU: Implement llvm.set.rounding (PR #88587)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 06:55:03 PDT 2024
================
@@ -4056,6 +4057,75 @@ SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
return DAG.getMergeValues({Result, GetReg.getValue(1)}, SL);
}
+SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+
+ SDValue NewMode = Op.getOperand(1);
+ assert(NewMode.getValueType() == MVT::i32);
+
+ // Index a table of 4-bit entries mapping from the C FLT_ROUNDS values to the
+ // hardware MODE.fp_round values.
+ if (auto *ConstMode = dyn_cast<ConstantSDNode>(NewMode)) {
+ uint32_t ClampedVal = std::min(
+ static_cast<uint32_t>(ConstMode->getZExtValue()),
+ static_cast<uint32_t>(AMDGPU::TowardZeroF32_TowardNegativeF64));
+ NewMode = DAG.getConstant(
+ AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
+ } else {
+ SDValue BitTable =
+ DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
+
+ // The supported standard values are 0-3. The extended values start at 8. We
+ // need to offset by 4 if the value is in the extended range.
+
+ // is_standard = value < 4;
+ // table_index = is_standard ? value : (value - 4)
+ // MODE.fp_round = (bit_table >> table_index) & 0xf
----------------
jayfoad wrote:
```suggestion
// MODE.fp_round = (bit_table >> (table_index << 2)) & 0xf
```
https://github.com/llvm/llvm-project/pull/88587
More information about the llvm-commits
mailing list