[llvm] 5660bb6 - AMDGPU: Remove denormal subtarget features
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 14:17:25 PDT 2020
Author: Matt Arsenault
Date: 2020-04-02T17:17:12-04:00
New Revision: 5660bb6bc9ac5ed910d95210e43ed437f155212d
URL: https://github.com/llvm/llvm-project/commit/5660bb6bc9ac5ed910d95210e43ed437f155212d
DIFF: https://github.com/llvm/llvm-project/commit/5660bb6bc9ac5ed910d95210e43ed437f155212d.diff
LOG: AMDGPU: Remove denormal subtarget features
Switch to using the denormal-fp-math/denormal-fp-math-f32 attributes.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/lib/Target/AMDGPU/R600Instructions.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
llvm/test/CodeGen/AMDGPU/clamp.ll
llvm/test/CodeGen/AMDGPU/debug-value.ll
llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
llvm/test/CodeGen/AMDGPU/fdiv.ll
llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
llvm/test/CodeGen/AMDGPU/fdot2.ll
llvm/test/CodeGen/AMDGPU/fma-combine.ll
llvm/test/CodeGen/AMDGPU/fmaxnum.ll
llvm/test/CodeGen/AMDGPU/fminnum.ll
llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
llvm/test/CodeGen/AMDGPU/fneg-combines.ll
llvm/test/CodeGen/AMDGPU/fpext-free.ll
llvm/test/CodeGen/AMDGPU/frem.ll
llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
llvm/test/CodeGen/AMDGPU/known-never-snan.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
llvm/test/CodeGen/AMDGPU/mad-combine.ll
llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
llvm/test/CodeGen/AMDGPU/mad-mix.ll
llvm/test/CodeGen/AMDGPU/madak.ll
llvm/test/CodeGen/AMDGPU/madmk.ll
llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
llvm/test/CodeGen/AMDGPU/omod.ll
llvm/test/CodeGen/AMDGPU/operand-folding.ll
llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
llvm/test/CodeGen/AMDGPU/rsq.ll
llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
llvm/test/CodeGen/AMDGPU/udiv.ll
llvm/test/CodeGen/AMDGPU/udivrem24.ll
llvm/test/CodeGen/AMDGPU/v_mac.ll
llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
llvm/test/tools/llvm-objdump/ELF/AMDGPU/source-lines.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index c9db0a603223..1f1065749c35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -494,30 +494,6 @@ def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
-// Denormal handling for fp64 and fp16 is controlled by the same
-// config register when fp16 supported.
-// TODO: Do we need a separate f16 setting when not legal?
-def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
- "FP64FP16Denormals",
- "true",
- "Enable double and half precision denormal handling",
- [FeatureFP64]
->;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64FP16Denormals",
- "true",
- "Enable double and half precision denormal handling",
- [FeatureFP64, FeatureFP64FP16Denormals]
->;
-
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
- "FP64FP16Denormals",
- "true",
- "Enable half precision denormal handling",
- [FeatureFP64FP16Denormals]
->;
-
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 60fab532c88a..da17d94f3265 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -1387,7 +1388,9 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
DT = DTWP ? &DTWP->getDomTree() : nullptr;
HasUnsafeFPMath = hasUnsafeFPMath(F);
- HasFP32Denormals = ST->hasFP32Denormals(F);
+
+ AMDGPU::SIModeRegisterDefaults Mode(F);
+ HasFP32Denormals = Mode.allFP32Denormals();
bool MadeChange = false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
index ea3952c316e4..6ca896c6de08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -18,15 +18,6 @@ def FeatureFMA : SubtargetFeature<"fmaf",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
- "FP32Denormals",
- "true",
- "Enable single precision denormal handling"
->;
-
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 09e18cc0c2fd..97d724a2f47b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -402,7 +402,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
}
#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
- Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
+ Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
return SelectionDAGISel::runOnMachineFunction(MF);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index bdf5c3b03663..51f4132b1096 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1644,9 +1644,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
// float fr = mad(fqneg, fb, fa);
- unsigned OpCode = MFI->getMode().allFP32Denormals() ?
- (unsigned)AMDGPUISD::FMAD_FTZ :
- (unsigned)ISD::FMAD;
+ unsigned OpCode = !MFI->getMode().allFP32Denormals() ?
+ (unsigned)ISD::FMAD :
+ (unsigned)AMDGPUISD::FMAD_FTZ;
+
SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
// int iq = (int)fq;
@@ -1729,9 +1730,10 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// Compute denominator reciprocal.
- unsigned FMAD = MFI->getMode().allFP32Denormals() ?
- (unsigned)AMDGPUISD::FMAD_FTZ :
- (unsigned)ISD::FMAD;
+ unsigned FMAD = !MFI->getMode().allFP32Denormals() ?
+ (unsigned)ISD::FMAD :
+ (unsigned)AMDGPUISD::FMAD_FTZ;
+
SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index dedbe15ab029..483b1568e532 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -807,3 +807,8 @@ def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
[(fmaxnum_ieee_oneuse node:$src0, node:$src1),
(fmaxnum_oneuse node:$src0, node:$src1)]
>;
+
+def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(fmad node:$src0, node:$src1, node:$src2),
+ (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
+>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 940ddff85d73..6ce22a3f78bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -18,7 +18,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
LocalMemoryObjects(),
ExplicitKernArgSize(0),
LDSSize(0),
- Mode(MF.getFunction(), MF.getSubtarget<GCNSubtarget>()),
+ Mode(MF.getFunction()),
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
MemoryBound(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index ad106d4efd16..5d58b8239ca0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -59,13 +59,6 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
- // FIXME: I don't think think Evergreen has any useful support for
- // denormals, but should be checked. Should we issue a warning somewhere
- // if someone tries to enable these?
- if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP32Denormals = false;
- }
-
HasMulU24 = getGeneration() >= EVERGREEN;
HasMulI24 = hasCaymanISA();
@@ -76,9 +69,6 @@ GCNSubtarget &
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
// Determine default and user-specified characteristics
- // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
- // enabled, but some instructions do not respect them and they run at the
- // double precision rate, so don't enable by default.
//
// We want to be able to turn these off, but making this a subtarget feature
// for SI has the unhelpful behavior that it unsets everything else if you
@@ -93,15 +83,6 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
- // FIXME: I don't think think Evergreen has any useful support for
- // denormals, but should be checked. Should we issue a warning somewhere
- // if someone tries to enable these?
- if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- FullFS += "+fp64-fp16-denormals,+fp32-denormals,";
- } else {
- FullFS += "-fp32-denormals,";
- }
-
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
// Disable mutually exclusive bits.
@@ -172,7 +153,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
TargetTriple(TT),
Has16BitInsts(false),
HasMadMixInsts(false),
- FP32Denormals(false),
FPExceptions(false),
HasSDWA(false),
HasVOP3PInsts(false),
@@ -200,7 +180,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FastFMAF32(false),
HalfRate64Ops(false),
- FP64FP16Denormals(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
CodeObjectV3(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index b200c221ddc6..c565c17ff03d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -66,7 +66,6 @@ class AMDGPUSubtarget {
protected:
bool Has16BitInsts;
bool HasMadMixInsts;
- bool FP32Denormals;
bool FPExceptions;
bool HasSDWA;
bool HasVOP3PInsts;
@@ -149,15 +148,6 @@ class AMDGPUSubtarget {
return HasMadMixInsts;
}
- bool hasFP32Denormals(const Function &F) const {
- // FIXME: This should not be a property of the subtarget. This should be a
- // property with a default set by the calling convention which can be
- // overridden by attributes. For now, use the subtarget feature as a
- // placeholder attribute. The function arguments only purpose is to
- // discourage use without a function context until this is removed.
- return FP32Denormals;
- }
-
bool hasFPExceptions() const {
return FPExceptions;
}
@@ -304,7 +294,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP64FP16Denormals;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
bool CodeObjectV3;
@@ -636,20 +625,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
- /// Alias for hasFP64FP16Denormals
- bool hasFP16Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
- /// Alias for hasFP64FP16Denormals
- bool hasFP64Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
- bool hasFP64FP16Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
bool supportsMinMaxDenormModes() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0896af063028..dd8a08baa8b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -941,8 +941,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
- AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
- AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
+ AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+ AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
return CallerMode.isInlineCompatible(CalleeMode);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 70001b22cb90..1313e2f8f576 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -133,7 +133,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
- HasFP32Denormals(ST->hasFP32Denormals(F)) { }
+ HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()) {}
bool hasBranchDivergence() { return true; }
bool useGPUDivergenceAnalysis() const;
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index 869c183e2245..2cc21364c439 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1006,7 +1006,7 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
inst, "MULADD_IEEE",
- [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
+ [(set f32:$dst, (any_fmad f32:$src0, f32:$src1, f32:$src2))]
>;
class FMA_Common <bits<5> inst> : R600_3OP <
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 2c9b32763428..daf92b030282 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1333,8 +1333,7 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
return true;
}
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
- const GCNSubtarget &ST) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
*this = getDefaultForCallingConv(F.getCallingConv());
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
@@ -1346,11 +1345,25 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";
- // FIXME: Split this when denormal-fp-math is used
- FP32InputDenormals = ST.hasFP32Denormals(F);
- FP32OutputDenormals = FP32InputDenormals;
- FP64FP16InputDenormals = ST.hasFP64FP16Denormals(F);
- FP64FP16OutputDenormals = FP64FP16InputDenormals;
+ StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
+ if (!DenormF32Attr.empty()) {
+ DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
+ FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
+
+ StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
+ if (!DenormAttr.empty()) {
+ DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
+
+ if (DenormF32Attr.empty()) {
+ FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
+
+ FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
}
namespace {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index de8eb7439926..20bf0ab46aee 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -695,19 +695,13 @@ struct SIModeRegisterDefaults {
FP64FP16InputDenormals(true),
FP64FP16OutputDenormals(true) {}
- // FIXME: Should not depend on the subtarget
- SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
+ SIModeRegisterDefaults(const Function &F);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
const bool IsCompute = AMDGPU::isCompute(CC);
SIModeRegisterDefaults Mode;
- Mode.DX10Clamp = true;
Mode.IEEE = IsCompute;
- Mode.FP32InputDenormals = true;
- Mode.FP32OutputDenormals = true;
- Mode.FP64FP16InputDenormals = true;
- Mode.FP64FP16OutputDenormals = true;
return Mode;
}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index 6986a3158461..e898f0d2c461 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -278,5 +278,5 @@ define amdgpu_kernel void @rcp_v2f16_f32_ftzdaz(<2 x half> addrspace(1)* %out, <
ret void
}
-attributes #0 = { nounwind "target-features"="+fp32-denormals" }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
index 05011091a669..b9f88557678a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
---
name: fmad_ftz_s32_vvvv
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 2799d84e2464..51cc92525174 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 560c2dead559..78f03210fb88 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
index 800ae81c303f..2ad2ee3502d8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
@@ -150,7 +150,6 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
ret void
}
-; FIXME: Should have denormals off by default.
; GCN-LABEL: {{^}}ps_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index 124a241a24ab..74228ce7f344 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -338,8 +338,8 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, f
}
attributes #0 = { nounwind optnone noinline }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
!0 = !{float 2.500000e+00}
!1 = !{float 5.000000e-01}
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
index 836a3b5e2dbe..5a56a1a264af 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -389,10 +389,10 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" }
-attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee.ieee" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index c3757c4ad679..df48096a368f 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -767,8 +767,8 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,-fp-exceptions" "no-nans-fp-math"="false" }
-attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
-attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
+attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
+attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll
index 0eb639e06e6a..0eff1df7621b 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll
@@ -89,7 +89,7 @@ declare float @barney() #2
declare void @eggs(float) #2
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { convergent nounwind "target-cpu"="gfx900" "target-features"="+fp32-denormals" }
+attributes #0 = { convergent nounwind "target-cpu"="gfx900" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index c37c2792de7f..62576eeea38b 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -82,7 +82,15 @@ define amdgpu_kernel void @test_f32_f16_f64_denormals(half addrspace(1)* %out0,
ret void
}
-; FIXME: Denormals should be off by default
+; GCN-LABEL: {{^}}test_just_f32_attr_flush
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 1
+define amdgpu_kernel void @test_just_f32_attr_flush(float addrspace(1)* %out0, double addrspace(1)* %out1) #9 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
; GCN-LABEL: {{^}}kill_gs_const:
; GCN: FloatMode: 240
; GCN: IeeeMode: 0
@@ -110,10 +118,11 @@ declare void @llvm.amdgcn.kill(i1)
attributes #0 = { nounwind "target-cpu"="tahiti" }
attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-features"="+fp64-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
-attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" }
-attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" }
-attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
+attributes #2 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #4 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #6 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #8 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #9 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
index 501971ea9662..2780a6dec8f0 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mattr=+fast-fmaf -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mattr=-fast-fmaf -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s
+; RUN: llc -march=amdgcn -mattr=+fast-fmaf -denormal-fp-math-f32=ieee -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
+; RUN: llc -march=amdgcn -mattr=-fast-fmaf -denormal-fp-math-f32=ieee -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s
; FIXME: This should also fold when fma is actually fast if an FMA
; exists in the original program.
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index 6d44292ffbe8..3d8a1c7d7458 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp-exceptions -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
@@ -907,4 +907,4 @@ declare float @llvm.amdgcn.frexp.mant.f32(float) #0
attributes #0 = { nounwind readnone }
attributes #1 = { "no-nans-fp-math"="true" }
-attributes #2 = { "target-features"="-fp64-fp16-denormals" }
+attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index e1aaeee0480a..e179ef37b3f8 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -739,6 +739,6 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
}
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
index f17536ef2def..ad472a92f86e 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -625,9 +625,9 @@ define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
}
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
-attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
-attributes #5 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" "target-cpu"="gfx900" }
-attributes #6 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="gfx900" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="tonga" }
+attributes #5 = { nounwind "denormal-fp-math"="ieee,ieee" "target-cpu"="gfx900" }
+attributes #6 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="gfx900" }
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
index c5df485bb1bf..29a48397d94e 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
; Make sure fdiv is promoted to f32.
@@ -263,9 +263,9 @@ define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(half addrspace(1)* %out) #0 {
ret void
}
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.sqrt.f16(half) #1
-declare half @llvm.fabs.f16(half) #1
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+declare half @llvm.sqrt.f16(half) #2
+declare half @llvm.fabs.f16(half) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll
index 74ad62159053..1986ecf20929 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -345,9 +345,8 @@ entry:
ret void
}
-
-attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals,+fp64-fp16-denormals,-flat-for-global" }
-attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals,-flat-for-global" }
-attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals,-flat-for-global" }
+attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
+attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
+attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" "target-features"="-flat-for-global" }
!0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
index c02a21efce5f..197bdd77d254 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s
; GCN-LABEL: {{^}}div_1_by_x_25ulp:
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
diff --git a/llvm/test/CodeGen/AMDGPU/fdot2.ll b/llvm/test/CodeGen/AMDGPU/fdot2.ll
index 2143fa055773..7da5dbd98716 100644
--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
-; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx1012 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
+; RUN: llc -march=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
index 98af82f1d006..b624ddf7a6fd 100644
--- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
@@ -1,6 +1,6 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index d5826abf55b1..b37bfbe30d59 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on:
; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
@@ -205,7 +205,7 @@ define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) #0 {
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
; GCN-NOT: v_max_f32
-define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) nounwind {
+define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) #0 {
%val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
ret <3 x float> %val
}
@@ -218,5 +218,5 @@ declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
declare double @llvm.maxnum.f64(double, double)
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll
index e7f9880de494..84ed73a86437 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on:
; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
@@ -225,5 +225,5 @@ declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
index 00f97b8a52d6..c9be3952f01a 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -1,8 +1,8 @@
-; XUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
+; XUN: llc -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
; make add an instruction if the fadd has more than one use.
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
index 248cbe6ab5cc..58d61ccaf97b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GFX10-FLUSH,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GFX10-FLUSH,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GFX10-FLUSH,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GFX10-FLUSH,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare half @llvm.fmuladd.f16(half, half, half) #1
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
index 4ef293f9a51f..a6b374445cfb 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -1,21 +1,21 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX900 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX900 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX900 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX900 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX906 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX906 %s
; FIXME: Should probably test this, but sometimes selecting fmac is painful to match.
-; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s
+; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s
; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
index d02c399dd0bb..95ea266c6eb7 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -1,12 +1,12 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
-
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index bc2edbebbdac..108b5830f22a 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=-fp32-denormals,+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=-fp32-denormals,+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
; --------------------------------------------------------------------------------
; fadd tests
@@ -2562,6 +2562,6 @@ declare float @llvm.amdgcn.fmul.legacy(float, float) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
index 1bd4caa180a4..fd9843400ba5 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
-; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll
index 17e231ab94b6..bf4f8f8b02e0 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -1,6 +1,6 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}frem_f32:
; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
@@ -109,5 +109,5 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
ret void
}
-attributes #0 = { nounwind "unsafe-fp-math"="false" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
index 1c501d2ef32b..0ad55502ac43 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
@@ -92,10 +92,10 @@ define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)
attributes #0 = { nounwind "target-cpu"="kaveri" "target-features"="-code-object-v3" }
attributes #1 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3" }
-attributes #2 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,+fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,-fp64-fp16-denormals" }
-attributes #4 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,+fp64-fp16-denormals" }
-attributes #5 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,-fp64-fp16-denormals" }
+attributes #2 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #4 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math"="ieee,ieee" }
+attributes #5 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
index 01981952ce68..ecd71887f8e3 100644
--- a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
+++ b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; Mostly overlaps with fmed3.ll to stress specific cases of
; isKnownNeverSNaN.
@@ -667,7 +667,7 @@ declare float @llvm.amdgcn.rsq.f32(float) #1
declare float @llvm.amdgcn.fract.f32(float) #1
declare float @llvm.amdgcn.cubeid(float, float, float) #0
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone speculatable }
!0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
index 000353ee1307..cd9c47a57c52 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
index ca35fbcb0182..38d1212a70cc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
index a3c08038b87b..a5585d5d09d3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -139,9 +139,9 @@ define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, dou
}
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
-attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
+attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" }
+attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" }
!0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index 4836c1dca875..9e1a7b62d03c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -1,9 +1,9 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s
declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index 082c4530edce..14ca8225273d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
declare half @llvm.maxnum.f16(half %a, half %b)
declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -88,7 +88,7 @@ define amdgpu_kernel void @maxnum_f16(
; GFX9-NEXT: s_endpgm
half addrspace(1)* %r,
half addrspace(1)* %a,
- half addrspace(1)* %b) {
+ half addrspace(1)* %b) #0 {
entry:
%a.val = load volatile half, half addrspace(1)* %a
%b.val = load volatile half, half addrspace(1)* %b
@@ -157,7 +157,7 @@ define amdgpu_kernel void @maxnum_f16_imm_a(
; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
half addrspace(1)* %r,
- half addrspace(1)* %b) {
+ half addrspace(1)* %b) #0 {
entry:
%b.val = load half, half addrspace(1)* %b
%r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val)
@@ -225,7 +225,7 @@ define amdgpu_kernel void @maxnum_f16_imm_b(
; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ half addrspace(1)* %a) #0 {
entry:
%a.val = load half, half addrspace(1)* %a
%r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0)
@@ -308,7 +308,7 @@ define amdgpu_kernel void @maxnum_v2f16(
; GFX9-NEXT: s_endpgm
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %b) {
+ <2 x half> addrspace(1)* %b) #0 {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
@@ -376,7 +376,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
<2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %b) {
+ <2 x half> addrspace(1)* %b) #0 {
entry:
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@@ -443,7 +443,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
<2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ <2 x half> addrspace(1)* %a) #0 {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@@ -542,7 +542,7 @@ define amdgpu_kernel void @maxnum_v3f16(
; GFX9-NEXT: s_endpgm
<3 x half> addrspace(1)* %r,
<3 x half> addrspace(1)* %a,
- <3 x half> addrspace(1)* %b) {
+ <3 x half> addrspace(1)* %b) #0 {
entry:
%a.val = load <3 x half>, <3 x half> addrspace(1)* %a
%b.val = load <3 x half>, <3 x half> addrspace(1)* %b
@@ -655,7 +655,7 @@ define amdgpu_kernel void @maxnum_v4f16(
; GFX9-NEXT: s_endpgm
<4 x half> addrspace(1)* %r,
<4 x half> addrspace(1)* %a,
- <4 x half> addrspace(1)* %b) {
+ <4 x half> addrspace(1)* %b) #0 {
entry:
%a.val = load <4 x half>, <4 x half> addrspace(1)* %a
%b.val = load <4 x half>, <4 x half> addrspace(1)* %b
@@ -746,10 +746,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX9-NEXT: s_endpgm
<4 x half> addrspace(1)* %r,
- <4 x half> addrspace(1)* %b) {
+ <4 x half> addrspace(1)* %b) #0 {
entry:
%b.val = load <4 x half>, <4 x half> addrspace(1)* %b
%r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
store <4 x half> %r.val, <4 x half> addrspace(1)* %r
ret void
}
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 4d8169a1c9ea..c28fd3abcbad 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
declare half @llvm.minnum.f16(half %a, half %b)
declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -88,7 +88,7 @@ define amdgpu_kernel void @minnum_f16_ieee(
; GFX9-NEXT: s_endpgm
half addrspace(1)* %r,
half addrspace(1)* %a,
- half addrspace(1)* %b) {
+ half addrspace(1)* %b) #0 {
entry:
%a.val = load volatile half, half addrspace(1)* %a
%b.val = load volatile half, half addrspace(1)* %b
@@ -97,7 +97,7 @@ entry:
ret void
}
-define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) {
+define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) #0 {
; SI-LABEL: minnum_f16_no_ieee:
; SI: ; %bb.0:
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -180,7 +180,7 @@ define amdgpu_kernel void @minnum_f16_imm_a(
; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
half addrspace(1)* %r,
- half addrspace(1)* %b) {
+ half addrspace(1)* %b) #0 {
entry:
%b.val = load half, half addrspace(1)* %b
%r.val = call half @llvm.minnum.f16(half 3.0, half %b.val)
@@ -248,7 +248,7 @@ define amdgpu_kernel void @minnum_f16_imm_b(
; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ half addrspace(1)* %a) #0 {
entry:
%a.val = load half, half addrspace(1)* %a
%r.val = call half @llvm.minnum.f16(half %a.val, half 4.0)
@@ -331,7 +331,7 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
; GFX9-NEXT: s_endpgm
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %b) {
+ <2 x half> addrspace(1)* %b) #0 {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
@@ -340,7 +340,7 @@ entry:
ret void
}
-define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) {
+define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) #0 {
; SI-LABEL: minnum_v2f16_no_ieee:
; SI: ; %bb.0:
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
@@ -429,7 +429,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
<2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %b) {
+ <2 x half> addrspace(1)* %b) #0 {
entry:
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@@ -496,7 +496,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
<2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ <2 x half> addrspace(1)* %a) #0 {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@@ -595,7 +595,7 @@ define amdgpu_kernel void @minnum_v3f16(
; GFX9-NEXT: s_endpgm
<3 x half> addrspace(1)* %r,
<3 x half> addrspace(1)* %a,
- <3 x half> addrspace(1)* %b) {
+ <3 x half> addrspace(1)* %b) #0 {
entry:
%a.val = load <3 x half>, <3 x half> addrspace(1)* %a
%b.val = load <3 x half>, <3 x half> addrspace(1)* %b
@@ -708,7 +708,7 @@ define amdgpu_kernel void @minnum_v4f16(
; GFX9-NEXT: s_endpgm
<4 x half> addrspace(1)* %r,
<4 x half> addrspace(1)* %a,
- <4 x half> addrspace(1)* %b) {
+ <4 x half> addrspace(1)* %b) #0 {
entry:
%a.val = load <4 x half>, <4 x half> addrspace(1)* %a
%b.val = load <4 x half>, <4 x half> addrspace(1)* %b
@@ -799,10 +799,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX9-NEXT: s_endpgm
<4 x half> addrspace(1)* %r,
- <4 x half> addrspace(1)* %b) {
+ <4 x half> addrspace(1)* %b) #0 {
entry:
%b.val = load <4 x half>, <4 x half> addrspace(1)* %b
%r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
store <4 x half> %r.val, <4 x half> addrspace(1)* %r
ret void
}
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
index a46aff7c6f68..c90970a93f9f 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -1,14 +1,14 @@
; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs
; Make sure we don't form mad with denormals
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare float @llvm.fabs.f32(float) #0
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
index 4d9607dfefb5..f20e29f17849 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -143,5 +143,5 @@ declare float @llvm.maxnum.f32(float, float) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 759b5aec2e44..db2ed78a15f0 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -310,5 +310,5 @@ declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #
declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index 0116f49d8bce..a7126121f09f 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -557,6 +557,6 @@ declare float @llvm.maxnum.f32(float, float) #2
declare float @llvm.fmuladd.f32(float, float, float) #2
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
-attributes #1 = { nounwind "target-features"="+fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 8b01c04740b8..c92244b4f990 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone
@@ -19,7 +19,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
; FMA: v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -53,7 +53,7 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add
; MAD-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
; FMA-DAG: v_fmac_f32_e32 [[VK]], [[VA]], [[VC]]
; GCN: s_endpgm
-define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -82,7 +82,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
+define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -110,7 +110,7 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o
; MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
; FMA: v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -134,7 +134,7 @@ define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out
; GFX6_8_9: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
; GFX10-MAD: v_mad_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
; FMA: v_fma_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
-define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
+define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -155,7 +155,7 @@ define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float
; GFX6_8_9: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
; GFX10-MAD: v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
; FMA: v_fmaak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
-define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -173,7 +173,7 @@ define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float
; GFX8_9: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GFX10-MAD: v_mac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; FMA: v_fmac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) #0 {
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
store float %madak, float addrspace(1)* %out, align 4
@@ -189,7 +189,7 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo
; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
; FMA: v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
; GCN: s_endpgm
-define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -215,7 +215,7 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia
; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
; FMA: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
; GCN: s_endpgm
-define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -265,4 +265,4 @@ bb4:
ret void
}
-attributes #0 = { nounwind}
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll
index b78a116c2107..0939ba183256 100644
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; FIXME: None of these trigger madmk emission anymore. It is still
; possible, but requires the correct registers to be used which is
@@ -12,7 +12,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
-define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -35,7 +35,7 @@ define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float add
; GCN-DAG: v_mac_f32_e32 [[VB]], [[SK]], [[VA]]
; GCN-DAG: v_mac_f32_e32 [[VC]], [[SK]], [[VA]]
; GCN: s_endpgm
-define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -64,7 +64,7 @@ define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, flo
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
-define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -83,7 +83,7 @@ define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out
; GCN-NOT: v_madmk_f32
; GCN: v_mac_f32_e32
; GCN: s_endpgm
-define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
+define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -99,7 +99,7 @@ define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x
; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]]
; GCN: s_endpgm
-define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
+define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -115,7 +115,7 @@ define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float
; GCN-NOT: v_madmk_f32
; GCN: v_mac_f32_e32
; GCN: s_endpgm
-define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
+define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -132,7 +132,7 @@ define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[SK]], [[VB]]
-define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -153,7 +153,7 @@ define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalia
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{[sv][0-9]+}}, |{{v[0-9]+}}|
-define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -174,7 +174,7 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalia
; GCN: buffer_load_dword [[A:v[0-9]+]]
; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0
-define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -191,7 +191,7 @@ define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias
; SI: s_or_b64
; SI: s_xor_b64
; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
-define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
+define amdgpu_kernel void @kill_madmk_verifier_error() #0 {
bb:
br label %bb2
@@ -214,4 +214,5 @@ bb6: ; preds = %bb2
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index 64c4f1a6fb7b..ffba105f793b 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
; Make sure that AMDGPUCodeGenPrepare introduces mul24 intrinsics
; after SLSR, as the intrinsics would interfere. It's unclear if these
@@ -254,7 +254,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
declare void @foo(i32) #0
declare float @llvm.fmuladd.f32(float, float, float) #1
-attributes #0 = { nounwind willreturn }
+attributes #0 = { nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone speculatable }
!0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll
index 52f2d948a04b..bd9996e1e229 100644
--- a/llvm/test/CodeGen/AMDGPU/omod.ll
+++ b/llvm/test/CodeGen/AMDGPU/omod.ll
@@ -275,10 +275,10 @@ declare half @llvm.minnum.f16(half, half) #1
declare half @llvm.maxnum.f16(half, half) #1
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-attributes #0 = { nounwind "target-features"="-fp32-denormals" "no-signed-zeros-fp-math"="true" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" "no-signed-zeros-fp-math"="true" }
-attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" "no-signed-zeros-fp-math"="true" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
+attributes #3 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
attributes #4 = { nounwind "no-signed-zeros-fp-math"="false" }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
index 0bdd692c1bcb..38aeaa8cb9c4 100644
--- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}fold_sgpr:
; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
-define amdgpu_kernel void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
+define amdgpu_kernel void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) #1 {
entry:
%tmp0 = icmp ne i32 %fold, 0
br i1 %tmp0, label %if, label %endif
@@ -20,7 +20,7 @@ endif:
; CHECK-LABEL: {{^}}fold_imm:
; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
-define amdgpu_kernel void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
+define amdgpu_kernel void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) #1 {
entry:
%fold = add i32 3, 2
%tmp0 = icmp ne i32 %cmp, 0
@@ -46,7 +46,7 @@ endif:
; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},
-define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
+define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) #1 {
entry:
%tmp0 = add i64 %val, 1
store i64 %tmp0, i64 addrspace(1)* %out
@@ -61,7 +61,7 @@ entry:
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
-define amdgpu_kernel void @vector_inline(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_inline(<4 x i32> addrspace(1)* %out) #1 {
entry:
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp0, 1
@@ -80,7 +80,7 @@ entry:
; CHECK-LABEL: {{^}}imm_one_use:
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
-define amdgpu_kernel void @imm_one_use(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @imm_one_use(i32 addrspace(1)* %out) #1 {
entry:
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = xor i32 %tmp0, 100
@@ -94,7 +94,7 @@ entry:
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
-define amdgpu_kernel void @vector_imm(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_imm(<4 x i32> addrspace(1)* %out) #1 {
entry:
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp0, 1
@@ -114,7 +114,7 @@ entry:
; CHECK: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
; CHECK: buffer_store_dword v[[LO]]
-define amdgpu_kernel void @no_fold_tied_subregister() {
+define amdgpu_kernel void @no_fold_tied_subregister() #1 {
%tmp1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
%tmp2 = extractelement <2 x float> %tmp1, i32 0
%tmp3 = extractelement <2 x float> %tmp1, i32 1
@@ -128,7 +128,7 @@ define amdgpu_kernel void @no_fold_tied_subregister() {
; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @no_extra_fold_on_same_opnd() {
+define void @no_extra_fold_on_same_opnd() #1 {
entry:
%s0 = load i32, i32 addrspace(5)* undef, align 4
%s0.i64= zext i32 %s0 to i64
@@ -151,3 +151,4 @@ if.else:
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
index 1b9264b4e43d..4702cc7000a6 100644
--- a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@@ -150,8 +150,8 @@ define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0
declare float @llvm.fabs.f32(float) #1
declare float @llvm.sqrt.f32(float) #1
-attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
!0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
index 1dabc37a2c70..b95efaf323ed 100644
--- a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; GCN-LABEL: {{^}}rcp_uint:
; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
%load = load i32, i32 addrspace(1)* %in, align 4
%cvt = uitofp i32 %load to float
%div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@@ -12,7 +12,7 @@ define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %
; GCN-LABEL: {{^}}rcp_sint:
; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
%load = load i32, i32 addrspace(1)* %in, align 4
%cvt = sitofp i32 %load to float
%div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@@ -20,4 +20,27 @@ define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %
ret void
}
+; GCN-LABEL: {{^}}rcp_uint_denorm:
+; GCN-NOT: v_rcp_iflag_f32
+define amdgpu_kernel void @rcp_uint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
+ %load = load i32, i32 addrspace(1)* %in, align 4
+ %cvt = uitofp i32 %load to float
+ %div = fdiv float 1.000000e+00, %cvt
+ store float %div, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}rcp_sint_denorm:
+; GCN-NOT: v_rcp_iflag_f32
+define amdgpu_kernel void @rcp_sint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
+ %load = load i32, i32 addrspace(1)* %in, align 4
+ %cvt = sitofp i32 %load to float
+ %div = fdiv float 1.000000e+00, %cvt
+ store float %div, float addrspace(1)* %out, align 4
+ ret void
+}
+
!0 = !{float 2.500000e+00}
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { "denormal-fp-math-f32"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.ll b/llvm/test/CodeGen/AMDGPU/rsq.ll
index 8480f344601b..4dd5b5517074 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.ll
@@ -1,5 +1,5 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare float @llvm.sqrt.f32(float) nounwind readnone
@@ -8,7 +8,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
; SI-LABEL: {{^}}rsq_f32:
; SI: v_rsq_f32_e32
; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%val = load float, float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
%div = fdiv float 1.0, %sqrt, !fpmath !0
@@ -20,7 +20,7 @@ define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrs
; SI-UNSAFE: v_rsq_f64_e32
; SI-SAFE: v_sqrt_f64_e32
; SI: s_endpgm
-define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
%val = load double, double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
%div = fdiv double 1.0, %sqrt
@@ -31,7 +31,7 @@ define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double add
; SI-LABEL: {{^}}rsq_f32_sgpr:
; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) #0 {
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
%div = fdiv float 1.0, %sqrt, !fpmath !0
store float %div, float addrspace(1)* %out, align 4
@@ -57,7 +57,7 @@ define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float
; SI-SAFE-NOT: v_rsq_f32
; SI: s_endpgm
-define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -83,7 +83,7 @@ define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(
; SI-UNSAFE: v_sqrt_f32_e32 [[SQRT:v[0-9]+]], v{{[0-9]+}}
; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%val = load float, float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val)
%div = fdiv float -1.0, %sqrt, !fpmath !0
@@ -98,7 +98,7 @@ define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float a
; SI-UNSAFE: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}
; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
%val = load double, double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val)
%div = fdiv double -1.0, %sqrt
@@ -114,7 +114,7 @@ define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double
; SI-UNSAFE: v_sqrt_f32_e64 [[SQRT:v[0-9]+]], -v{{[0-9]+}}
; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
%val = load float, float addrspace(1)* %in, align 4
%val.fneg = fsub float -0.0, %val
%sqrt = call float @llvm.sqrt.f32(float %val.fneg)
@@ -130,7 +130,7 @@ define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, flo
; SI-UNSAFE: v_sqrt_f64_e64 [[SQRT:v\[[0-9]+:[0-9]+\]]], -v{{\[[0-9]+:[0-9]+\]}}
; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
%val = load double, double addrspace(1)* %in, align 4
%val.fneg = fsub double -0.0, %val
%sqrt = call double @llvm.sqrt.f64(double %val.fneg)
@@ -140,3 +140,5 @@ define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, do
}
!0 = !{float 2.500000e+00}
+
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index b954b778dc65..9d3144196eb1 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -149,7 +149,7 @@
attributes #0 = { argmemonly nounwind }
attributes #1 = { nounwind readnone speculatable }
- attributes #2 = { convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" "target-features"="+fp32-denormals" }
+ attributes #2 = { convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
index 97d5bcce6190..ae836e447cc5 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -1,7 +1,7 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s
; GCN-LABEL: {{^}}add_shr_i32:
; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}}
@@ -12,7 +12,7 @@
; GFX9: v_add_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX10: v_add_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%a = load i32, i32 addrspace(1)* %in, align 4
%shr = lshr i32 %a, 16
%add = add i32 %a, %shr
@@ -28,7 +28,7 @@ define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)*
; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9: v_sub_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10: v_sub_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%a = load i32, i32 addrspace(1)* %in, align 4
%shr = lshr i32 %a, 16
%sub = sub i32 %shr, %a
@@ -44,7 +44,7 @@ define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)*
; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) {
+define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) #0 {
%a = load i32, i32 addrspace(1)* %in1, align 4
%b = load i32, i32 addrspace(1)* %in2, align 4
%shra = lshr i32 %a, 16
@@ -61,7 +61,7 @@ define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)*
; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SDWA-NOT: v_mul_u32_u24_sdwa
-define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) #0 {
entry:
%a = load i16, i16 addrspace(1)* %ina, align 4
%b = load i16, i16 addrspace(1)* %inb, align 4
@@ -84,7 +84,7 @@ entry:
; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
%b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -111,7 +111,7 @@ entry:
; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) #0 {
entry:
%a = load <4 x i16>, <4 x i16> addrspace(1)* %ina, align 4
%b = load <4 x i16>, <4 x i16> addrspace(1)* %inb, align 4
@@ -146,7 +146,7 @@ entry:
; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) #0 {
entry:
%a = load <8 x i16>, <8 x i16> addrspace(1)* %ina, align 4
%b = load <8 x i16>, <8 x i16> addrspace(1)* %inb, align 4
@@ -161,7 +161,7 @@ entry:
; SDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SDWA-NOT: v_mul_f16_sdwa
-define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) #0 {
entry:
%a = load half, half addrspace(1)* %ina, align 4
%b = load half, half addrspace(1)* %inb, align 4
@@ -184,7 +184,7 @@ entry:
; GFX9_10: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 {
entry:
%a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
%b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@@ -209,7 +209,7 @@ entry:
; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) #0 {
entry:
%a = load <4 x half>, <4 x half> addrspace(1)* %ina, align 4
%b = load <4 x half>, <4 x half> addrspace(1)* %inb, align 4
@@ -240,7 +240,7 @@ entry:
; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) #0 {
entry:
%a = load <8 x half>, <8 x half> addrspace(1)* %ina, align 4
%b = load <8 x half>, <8 x half> addrspace(1)* %inb, align 4
@@ -256,7 +256,7 @@ entry:
; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SDWA-NOT: v_mul_u32_u24_sdwa
-define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) #0 {
entry:
%a = load i8, i8 addrspace(1)* %ina, align 4
%b = load i8, i8 addrspace(1)* %inb, align 4
@@ -285,7 +285,7 @@ entry:
; GFX10: v_lshlrev_b16_e64 v{{[0-9]+}}, 8, v
; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) #0 {
entry:
%a = load <2 x i8>, <2 x i8> addrspace(1)* %ina, align 4
%b = load <2 x i8>, <2 x i8> addrspace(1)* %inb, align 4
@@ -315,7 +315,7 @@ entry:
; GFX10-DAG: v_mul_lo_u16_e64
; GFX10-DAG: v_mul_lo_u16_e64
-define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) #0 {
entry:
%a = load <4 x i8>, <4 x i8> addrspace(1)* %ina, align 4
%b = load <4 x i8>, <4 x i8> addrspace(1)* %inb, align 4
@@ -355,7 +355,7 @@ entry:
; GFX10-DAG: v_mul_lo_u16_e64
; GFX10-DAG: v_mul_lo_u16_e64
-define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) #0 {
entry:
%a = load <8 x i8>, <8 x i8> addrspace(1)* %ina, align 4
%b = load <8 x i8>, <8 x i8> addrspace(1)* %inb, align 4
@@ -376,7 +376,7 @@ entry:
; FIXME: Should be able to avoid or
define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
<2 x half> addrspace(1)* %r,
- <2 x i16> addrspace(1)* %a) {
+ <2 x i16> addrspace(1)* %a) #0 {
entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
%r.val = sitofp <2 x i16> %a.val to <2 x half>
@@ -399,7 +399,7 @@ entry:
; GFX9_10: v_pk_mul_f16 v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v[[SRC:[0-9]+]]
; GFX9_10: v_pk_add_f16 v{{[0-9]+}}, v[[DST_MUL]], v[[SRC]]
-define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 {
entry:
%a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
%b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@@ -421,7 +421,7 @@ entry:
; GFX10: v_pk_mul_lo_u16 v{{[0-9]+}}, 0x141007b, v{{[0-9]+}}
-define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
%mul = mul <2 x i16> %a, <i16 123, i16 321>
@@ -443,7 +443,7 @@ entry:
; GFX9_10: v_pk_mul_lo_u16 v[[DST1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v[[DST1]], v{{[0-9]+}}
-define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
%b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -460,7 +460,7 @@ entry:
; GFX9_10: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
%b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -503,7 +503,7 @@ store_label:
; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) {
+define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) #0 {
entry:
%idxprom = ashr exact i64 15, 32
%arrayidx = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %sourceA, i64 %idxprom
@@ -564,3 +564,5 @@ bb11: ; preds = %bb10, %bb2
store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef
br label %bb1
}
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index e9a14bbda1ac..d6c9791e7d92 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -1,7 +1,7 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
diff --git a/llvm/test/CodeGen/AMDGPU/udivrem24.ll b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
index 6e37578ef252..ec15cc33be42 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem24.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
@@ -21,6 +21,63 @@ define amdgpu_kernel void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in
ret void
}
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in_out:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
+ %result = udiv i8 %num, %den
+ store i8 %result, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
+ %result = udiv i8 %num, %den
+ store i8 %result, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_out:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_out(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #2 {
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
+ %result = udiv i8 %num, %den
+ store i8 %result, i8 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}udiv24_i16:
; SI: v_cvt_f32_u32
; SI: v_cvt_f32_u32
@@ -325,3 +382,7 @@ define amdgpu_kernel void @test_udiv24_u23_u16_i32(i32 addrspace(1)* %out, i32 a
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
+attributes #2 = { "denormal-fp-math-f32"="preserve-sign,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index cd15d88b46b3..03964acb26bc 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -1,6 +1,6 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
; GCN-LABEL: {{^}}mac_vvv:
; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
index 4ff0d1b44e21..e10d2d389c69 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -1,5 +1,5 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}mac_f16:
; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]]
@@ -677,6 +677,6 @@ entry:
declare void @llvm.amdgcn.s.barrier() #2
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
index 3e5c8393b757..cf0d45603218 100644
--- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
define amdgpu_kernel void @madak_f16(
; SI-LABEL: madak_f16:
@@ -52,7 +52,7 @@ define amdgpu_kernel void @madak_f16(
; VI-NEXT: s_endpgm
half addrspace(1)* %r,
half addrspace(1)* %a,
- half addrspace(1)* %b) {
+ half addrspace(1)* %b) #0 {
entry:
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
@@ -136,7 +136,7 @@ define amdgpu_kernel void @madak_f16_use_2(
half addrspace(1)* %r1,
half addrspace(1)* %a,
half addrspace(1)* %b,
- half addrspace(1)* %c) {
+ half addrspace(1)* %c) #0 {
entry:
%a.val = load volatile half, half addrspace(1)* %a
%b.val = load volatile half, half addrspace(1)* %b
@@ -151,3 +151,5 @@ entry:
store half %r1.val, half addrspace(1)* %r1
ret void
}
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll b/llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
index 87330c72d442..961933bdd6ef 100644
--- a/llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
+++ b/llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
@@ -15,89 +15,74 @@ define i32 @target_cpu_call_no_target_cpu() #1 {
; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu(
; CHECK-NEXT: ret i32 0
-define i32 @target_cpu_target_features_call_no_target_cpu() #2 {
+define i32 @target_cpu_target_features_call_no_target_cpu() {
%call = call i32 @func_no_target_cpu()
ret i32 %call
}
-; CHECK-LABEL: @fp32_denormals(
-define i32 @fp32_denormals() #3 {
- ret i32 0
-}
-
-; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals(
-; CHECK-NEXT: call i32 @fp32_denormals()
-define i32 @no_fp32_denormals_call_f32_denormals() #4 {
- %call = call i32 @fp32_denormals()
- ret i32 %call
-}
-
; Make sure gfx9 can call unspecified functions because of movrel
; feature change.
; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu(
; CHECK-NEXT: ret i32 0
-define i32 @gfx9_target_features_call_no_target_cpu() #5 {
+define i32 @gfx9_target_features_call_no_target_cpu() #2 {
%call = call i32 @func_no_target_cpu()
ret i32 %call
}
-define i32 @func_no_halfrate64ops() #6 {
+define i32 @func_no_halfrate64ops() #3 {
ret i32 0
}
-define i32 @func_with_halfrate64ops() #7 {
+define i32 @func_with_halfrate64ops() #4 {
ret i32 0
}
; CHECK-LABEL: @call_func_without_halfrate64ops(
; CHECK-NEXT: ret i32 0
-define i32 @call_func_without_halfrate64ops() #7 {
+define i32 @call_func_without_halfrate64ops() #4 {
%call = call i32 @func_no_halfrate64ops()
ret i32 %call
}
; CHECK-LABEL: @call_func_with_halfrate64ops(
; CHECK-NEXT: ret i32 0
-define i32 @call_func_with_halfrate64ops() #6 {
+define i32 @call_func_with_halfrate64ops() #3 {
%call = call i32 @func_with_halfrate64ops()
ret i32 %call
}
-define i32 @func_no_loadstoreopt() #8 {
+define i32 @func_no_loadstoreopt() #5 {
ret i32 0
}
-define i32 @func_with_loadstoreopt() #9 {
+define i32 @func_with_loadstoreopt() #6 {
ret i32 0
}
; CHECK-LABEL: @call_func_without_loadstoreopt(
; CHECK-NEXT: ret i32 0
-define i32 @call_func_without_loadstoreopt() #9 {
+define i32 @call_func_without_loadstoreopt() #6 {
%call = call i32 @func_no_loadstoreopt()
ret i32 %call
}
-define i32 @enable_codeobjectv3() #10 {
+define i32 @enable_codeobjectv3() #7 {
ret i32 999
}
; CHECK-LABEL: @disable_codeobjectv3_call_codeobjectv3(
; CHECK-NEXT: ret i32 999
-define i32 @disable_codeobjectv3_call_codeobjectv3() #11 {
+define i32 @disable_codeobjectv3_call_codeobjectv3() #8 {
%call = call i32 @enable_codeobjectv3()
ret i32 %call
}
attributes #0 = { nounwind }
attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "target-features"="-fp32-denormals" }
-attributes #5 = { nounwind "target-cpu"="gfx900" }
-attributes #6 = { nounwind "target-features"="-half-rate-64-ops" }
-attributes #7 = { nounwind "target-features"="+half-rate-64-ops" }
-attributes #8 = { nounwind "target-features"="-load-store-opt" }
-attributes #9 = { nounwind "target-features"="+load-store-opt" }
-attributes #10 = { nounwind "target-features"="+code-object-v3" }
-attributes #11 = { nounwind "target-features"="-code-object-v3" }
+attributes #2 = { nounwind "target-cpu"="gfx900" }
+attributes #3 = { nounwind "target-features"="-half-rate-64-ops" }
+attributes #4 = { nounwind "target-features"="+half-rate-64-ops" }
+attributes #5 = { nounwind "target-features"="-load-store-opt" }
+attributes #6 = { nounwind "target-features"="+load-store-opt" }
+attributes #7 = { nounwind "target-features"="+code-object-v3" }
+attributes #8 = { nounwind "target-features"="-code-object-v3" }
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/source-lines.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/source-lines.ll
index 6258d48748e2..7b8a53a2626a 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/source-lines.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/source-lines.ll
@@ -68,7 +68,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fp64-fp16-denormals,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
More information about the llvm-commits
mailing list