[llvm] AMDGPU: Don't fold clamp/omod modifiers without nofpexcept (PR #95950)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 18 09:26:08 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/95950.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+12-1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+8-4)
- (modified) llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir (+88)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 5c411a0955878..7bf6a635158eb 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1519,6 +1519,9 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
case AMDGPU::V_MAX_F64_e64:
case AMDGPU::V_MAX_NUM_F64_e64:
case AMDGPU::V_PK_MAX_F16: {
+ if (MI.mayRaiseFPException())
+ return nullptr;
+
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
return nullptr;
@@ -1565,6 +1568,9 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
if (TII->getClampMask(*Def) != TII->getClampMask(MI))
return false;
+ if (Def->mayRaiseFPException())
+ return false;
+
MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
if (!DefClamp)
return false;
@@ -1650,7 +1656,9 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
Op == AMDGPU::V_MUL_F16_e64 || Op == AMDGPU::V_MUL_F16_t16_e64 ||
Op == AMDGPU::V_MUL_F16_fake16_e64) &&
- MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
+ MFI->getMode().FP64FP16Denormals.Output !=
+ DenormalMode::PreserveSign) ||
+ MI.mayRaiseFPException())
return std::pair(nullptr, SIOutMods::NONE);
const MachineOperand *RegOp = nullptr;
@@ -1725,6 +1733,9 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
return false;
+ if (Def->mayRaiseFPException())
+ return false;
+
// Clamp is applied after omod. If the source already has clamp set, don't
// fold it.
if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 30c27b6439fc0..cc1b9ac0c9ecd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3972,7 +3972,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Dst)
.add(*Src0)
.add(*Src1)
- .addImm(Imm);
+ .addImm(Imm)
+ .setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
@@ -3991,7 +3992,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Dst)
.add(*Src0)
.addImm(Imm)
- .add(*Src2);
+ .add(*Src2)
+ .setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
@@ -4012,7 +4014,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Dst)
.add(*Src1)
.addImm(Imm)
- .add(*Src2);
+ .add(*Src2)
+ .setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
@@ -4048,7 +4051,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Src2Mods ? Src2Mods->getImm() : 0)
.add(*Src2)
.addImm(Clamp ? Clamp->getImm() : 0)
- .addImm(Omod ? Omod->getImm() : 0);
+ .addImm(Omod ? Omod->getImm() : 0)
+ .setMIFlags(MI.getFlags());
if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel))
MIB.addImm(OpSel ? OpSel->getImm() : 0);
updateLiveVariables(LV, MI, *MIB);
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
index d1ba62f1d87f6..761dd162df609 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
+++ b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
@@ -410,3 +410,91 @@ body: |
%1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit $mode, implicit $exec
...
+
+---
+# GCN-LABEL: name: clamp_missing_nofpexcept_0
+# GCN: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GCN-NEXT: %3:vgpr_32 = V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
+name: clamp_missing_nofpexcept_0
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
+...
+
+---
+# GCN-LABEL: name: clamp_missing_nofpexcept_1
+# GCN: %2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GCN-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
+name: clamp_missing_nofpexcept_1
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
+...
+
+---
+# GCN-LABEL: name: omod_missing_nofpexcept_0
+# GCN: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GCN-NEXT: %3:vgpr_32 = nsz V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
+name: omod_missing_nofpexcept_0
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = nsz V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
+
+...
+
+---
+# GCN-LABEL: name: omod_missing_nofpexcept_1
+# GCN: %2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GCN-NEXT: %3:vgpr_32 = nsz nofpexcept V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
+name: omod_missing_nofpexcept_1
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = nsz nofpexcept V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
+
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/95950
More information about the llvm-commits
mailing list