[llvm] r292837 - AMDGPU: Combine fp16/fp64 subtarget features
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 23 14:31:04 PST 2017
Author: arsenm
Date: Mon Jan 23 16:31:03 2017
New Revision: 292837
URL: http://llvm.org/viewvc/llvm-project?rev=292837&view=rev
Log:
AMDGPU: Combine fp16/fp64 subtarget features
The same control register controls both, and are set to
the same defaults. Keep the old names around as aliases.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll
llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll
llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-fp-mode.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll
llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll
llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Mon Jan 23 16:31:03 2017
@@ -206,12 +206,6 @@ def FeatureDPP : SubtargetFeature<"dpp",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
- "FP16Denormals",
- "true",
- "Enable half precision denormal handling"
->;
-
// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
@@ -221,13 +215,30 @@ def FeatureFP32Denormals : SubtargetFeat
"Enable single precision denormal handling"
>;
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64Denormals",
+// Denormal handling for fp64 and fp16 is controlled by the same
+// config register when fp16 supported.
+// TODO: Do we need a separate f16 setting when not legal?
+def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
+ "FP64FP16Denormals",
"true",
- "Enable double precision denormal handling",
+ "Enable double and half precision denormal handling",
[FeatureFP64]
>;
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable double and half precision denormal handling",
+ [FeatureFP64, FeatureFP64FP16Denormals]
+>;
+
+def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable half precision denormal handling",
+ [FeatureFP64FP16Denormals]
+>;
+
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Jan 23 16:31:03 2017
@@ -41,9 +41,10 @@ AMDGPUSubtarget::initializeSubtargetDepe
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
- SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
+ SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,";
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -52,9 +53,8 @@ AMDGPUSubtarget::initializeSubtargetDepe
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP16Denormals = false;
+ FP64FP16Denormals = false;
FP32Denormals = false;
- FP64Denormals = false;
}
// Set defaults if needed.
@@ -78,9 +78,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
FastFMAF32(false),
HalfRate64Ops(false),
- FP16Denormals(false),
FP32Denormals(false),
- FP64Denormals(false),
+ FP64FP16Denormals(false),
FPExceptions(false),
FlatForGlobal(false),
UnalignedScratchAccess(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Jan 23 16:31:03 2017
@@ -81,9 +81,8 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP16Denormals;
bool FP32Denormals;
- bool FP64Denormals;
+ bool FP64FP16Denormals;
bool FPExceptions;
bool FlatForGlobal;
bool UnalignedScratchAccess;
@@ -282,7 +281,7 @@ public:
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
bool hasFP16Denormals() const {
- return FP16Denormals;
+ return FP64FP16Denormals;
}
bool hasFP32Denormals() const {
@@ -290,7 +289,7 @@ public:
}
bool hasFP64Denormals() const {
- return FP64Denormals;
+ return FP64FP16Denormals;
}
bool hasFPExceptions() const {
Modified: llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll Mon Jan 23 16:31:03 2017
@@ -54,6 +54,34 @@ define void @test_no_denormals(float add
ret void
}
+; GCN-LABEL: {{^}}test_f16_f64_denormals:
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 1
+define void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
+ store half 0.0, half addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_no_f16_f64_denormals:
+; GCN: FloatMode: 0
+; GCN: IeeeMode: 1
+define void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
+ store half 0.0, half addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_f32_f16_f64_denormals:
+; GCN: FloatMode: 240
+; GCN: IeeeMode: 1
+define void @test_f32_f16_f64_denormals(half addrspace(1)* %out0, float addrspace(1)* %out1, double addrspace(1)* %out2) #8 {
+ store half 0.0, half addrspace(1)* %out0
+ store float 0.0, float addrspace(1)* %out1
+ store double 0.0, double addrspace(1)* %out2
+ ret void
+}
+
; GCN-LABEL: {{^}}kill_gs_const:
; GCN: IeeeMode: 0
define amdgpu_gs void @kill_gs_const() {
@@ -87,4 +115,7 @@ attributes #1 = { nounwind "target-cpu"=
attributes #2 = { nounwind "target-features"="+fp64-denormals" }
attributes #3 = { nounwind "target-features"="+fp32-denormals" }
attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
-attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
+attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
+attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" }
+attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" }
+attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll Mon Jan 23 16:31:03 2017
@@ -69,10 +69,10 @@ define void @test_fold_canonicalize_lite
ret void
}
-; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
; GCN: buffer_store_short [[REG]]
-define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
+define void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
store half %canonicalized, half addrspace(1)* %out
ret void
@@ -87,10 +87,10 @@ define void @test_denormals_fold_canonic
ret void
}
-; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
; GCN: buffer_store_short [[REG]]
-define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
+define void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
store half %canonicalized, half addrspace(1)* %out
ret void
@@ -282,7 +282,7 @@ define void @test_fold_canonicalize_lite
}
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
; GCN: buffer_store_dword [[REG]]
define void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
@@ -300,7 +300,7 @@ define void @test_denormals_fold_canonic
}
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
; GCN: buffer_store_dword [[REG]]
define void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
@@ -382,5 +382,5 @@ define void @test_fold_canonicalize_snan
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
-attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }
+attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
+attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll Mon Jan 23 16:31:03 2017
@@ -347,5 +347,5 @@ define void @test_fold_canonicalize_snan
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll Mon Jan 23 16:31:03 2017
@@ -1,5 +1,7 @@
; XUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+
; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
; make add an instruction if the fadd has more than one use.
@@ -115,7 +117,8 @@ define void @fmul_x2_xn3_f32(float addrs
; VI: v_cndmask_b32_e32
; VI: v_add_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
; VI: v_mul_f16_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
+; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
+; VI-DENORM: v_fma_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
@@ -136,7 +139,10 @@ define void @multiple_fadd_use_test_f16(
; GCN-LABEL: {{^}}multiple_use_fadd_fmac_f16:
; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}}
-; GCN-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
+
+; VI-FLUSH-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
+; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}}
+
; GCN-DAG: buffer_store_short [[MUL2]]
; GCN-DAG: buffer_store_short [[MAD]]
; GCN: s_endpgm
@@ -153,7 +159,10 @@ define void @multiple_use_fadd_fmac_f16(
; GCN-LABEL: {{^}}multiple_use_fadd_fmad_f16:
; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}|
-; GCN-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
+
+; VI-FLUSH-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
+; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
+
; GCN-DAG: buffer_store_short [[MUL2]]
; GCN-DAG: buffer_store_short [[MAD]]
; GCN: s_endpgm
@@ -170,8 +179,12 @@ define void @multiple_use_fadd_fmad_f16(
}
; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad_f16:
-; GCN: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
-; GCN: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
+; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
+; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
+
+; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
+; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
+
define void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll Mon Jan 23 16:31:03 2017
@@ -1,12 +1,12 @@
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare half @llvm.fmuladd.f16(half, half, half) #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-fp-mode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-fp-mode.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-fp-mode.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-fp-mode.ll Mon Jan 23 16:31:03 2017
@@ -62,7 +62,7 @@ define void @test_no_denormals(float add
attributes #0 = { nounwind "target-cpu"="kaveri" }
attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" }
-attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
-attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-fp16-denormals" }
+attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
+attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll Mon Jan 23 16:31:03 2017
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
-; RUN: llc -march=amdgcn -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
+; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
Modified: llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll Mon Jan 23 16:31:03 2017
@@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
; GCN-LABEL: {{^}}mac_vvv:
; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
@@ -250,8 +251,8 @@ bb:
; FIXME: How is this not folded?
; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 0x3c00
-; VI: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
-; VI: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
+; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
+; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
define void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 {
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Modified: llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll Mon Jan 23 16:31:03 2017
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}mac_f16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
@@ -604,5 +604,5 @@ entry:
ret void
}
-attributes #0 = {"unsafe-fp-math"="false"}
-attributes #1 = {"unsafe-fp-math"="true"}
+attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll?rev=292837&r1=292836&r2=292837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/v_madak_f16.ll Mon Jan 23 16:31:03 2017
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}madak_f16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
More information about the llvm-commits
mailing list