[clang] c58a6a6 - [HIP] Fix device lib selection
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Sat May 1 18:41:23 PDT 2021
Author: Yaxun (Sam) Liu
Date: 2021-05-01T20:31:11-04:00
New Revision: c58a6a6fb4110ee1ffd0e45ad98872e55855b310
URL: https://github.com/llvm/llvm-project/commit/c58a6a6fb4110ee1ffd0e45ad98872e55855b310
DIFF: https://github.com/llvm/llvm-project/commit/c58a6a6fb4110ee1ffd0e45ad98872e55855b310.diff
LOG: [HIP] Fix device lib selection
Choose optimized device lib bitcode by fp options
for performance.
Reviewed by: Artem Belevich, Fangrui Song
Differential Revision: https://reviews.llvm.org/D101654
Added:
clang/test/CodeGenCUDA/correctly-rounded-div.cu
Modified:
clang/include/clang/Basic/CodeGenOptions.def
clang/include/clang/Driver/Options.td
clang/lib/CodeGen/CGExprScalar.cpp
clang/lib/Driver/ToolChains/HIP.cpp
clang/test/Driver/hip-device-libs.hip
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index e0bfcf6ef3e4c..4b1ff8e70afd2 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -172,7 +172,8 @@ CODEGENOPT(NoInlineLineTables, 1, 0) ///< Whether debug info should contain
CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled.
CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled.
CODEGENOPT(NullPointerIsValid , 1, 0) ///< Assume Null pointer deference is defined.
-CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
+CODEGENOPT(OpenCLCorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
+CODEGENOPT(HIPCorrectlyRoundedDivSqrt, 1, 1) ///< -fno-hip-fp32-correctly-rounded-divide-sqrt
CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.
CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a8a7177c82cc9..ea1c861f4ac35 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -834,7 +834,7 @@ def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group
HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">,
- MarshallingInfoFlag<CodeGenOpts<"CorrectlyRoundedDivSqrt">>;
+ MarshallingInfoFlag<CodeGenOpts<"OpenCLCorrectlyRoundedDivSqrt">>;
def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">,
MarshallingInfoFlag<CodeGenOpts<"UniformWGSize">>;
@@ -939,6 +939,13 @@ defm hip_new_launch_api : BoolFOption<"hip-new-launch-api",
LangOpts<"HIPUseNewLaunchAPI">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Use">, NegFlag<SetFalse, [], "Don't use">,
BothFlags<[], " new kernel launching API for HIP">>;
+defm hip_fp32_correctly_rounded_divide_sqrt : BoolFOption<"hip-fp32-correctly-rounded-divide-sqrt",
+ CodeGenOpts<"HIPCorrectlyRoundedDivSqrt">, DefaultTrue,
+ PosFlag<SetTrue, [], "Specify">,
+ NegFlag<SetFalse, [CC1Option], "Don't specify">,
+ BothFlags<[], " that single precision floating-point divide and sqrt used in "
+ "the program source are correctly rounded (HIP device compilation only)">>,
+ ShouldParseIf<hip.KeyPath>;
defm gpu_allow_device_init : BoolFOption<"gpu-allow-device-init",
LangOpts<"GPUAllowDeviceInit">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Allow">, NegFlag<SetFalse, [], "Don't allow">,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 42d07c1f5233f..b469a4454f158 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3216,8 +3216,10 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
llvm::Value *Val;
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
- if (CGF.getLangOpts().OpenCL &&
- !CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) {
+ if ((CGF.getLangOpts().OpenCL &&
+ !CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
+ (CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice &&
+ !CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 126eb65e3c4db..e483102593e13 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -404,11 +404,17 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
options::OPT_fno_gpu_flush_denormals_to_zero,
getDefaultDenormsAreZeroForTarget(Kind));
- // TODO: Check standard C++ flags?
- bool FiniteOnly = false;
- bool UnsafeMathOpt = false;
- bool FastRelaxedMath = false;
- bool CorrectSqrt = true;
+ bool FiniteOnly =
+ DriverArgs.hasFlag(options::OPT_ffinite_math_only,
+ options::OPT_fno_finite_math_only, false);
+ bool UnsafeMathOpt =
+ DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
+ options::OPT_fno_unsafe_math_optimizations, false);
+ bool FastRelaxedMath = DriverArgs.hasFlag(
+ options::OPT_ffast_math, options::OPT_fno_fast_math, false);
+ bool CorrectSqrt = DriverArgs.hasFlag(
+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
bool Wave64 = isWave64(DriverArgs, Kind);
if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
diff --git a/clang/test/CodeGenCUDA/correctly-rounded-div.cu b/clang/test/CodeGenCUDA/correctly-rounded-div.cu
new file mode 100644
index 0000000000000..4f17220534cad
--- /dev/null
+++ b/clang/test/CodeGenCUDA/correctly-rounded-div.cu
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple -amdgcn-amd-amdhsa \
+// RUN: -target-cpu gfx906 -fcuda-is-device -x hip \
+// RUN: | FileCheck --check-prefixes=COMMON,CRDIV %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple -amdgcn-amd-amdhsa \
+// RUN: -target-cpu gfx906 -fcuda-is-device -x hip \
+// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt \
+// RUN: | FileCheck --check-prefixes=COMMON,NCRDIV %s
+
+#include "Inputs/cuda.h"
+
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+
+// COMMON-LABEL: @_Z11spscalardiv
+// COMMON: fdiv{{.*}},
+// NCRDIV: !fpmath ![[MD:[0-9]+]]
+// CRDIV-NOT: !fpmath
+__device__ float spscalardiv(float a, float b) {
+ return a / b;
+}
+
+// COMMON-LABEL: @_Z11spvectordiv
+// COMMON: fdiv{{.*}},
+// NCRDIV: !fpmath ![[MD]]
+// CRDIV-NOT: !fpmath
+__device__ float4 spvectordiv(float4 a, float4 b) {
+ return a / b;
+}
+
+// COMMON-LABEL: @_Z11dpscalardiv
+// COMMON-NOT: !fpmath
+__device__ double dpscalardiv(double a, double b) {
+ return a / b;
+}
+
+// NCRDIV: ![[MD]] = !{float 2.500000e+00}
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 62556bf9621a3..1f95fc2e3c353 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -113,6 +113,30 @@
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST
+// Test -fast-math
+// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -ffast-math --rocm-path=%S/Inputs/rocm \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=FAST
+
+// Test -ffinite-math-only
+// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -ffinite-math-only --rocm-path=%S/Inputs/rocm \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=FINITE
+
+// Test -funsafe-math-optimizations
+// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -funsafe-math-optimizations --rocm-path=%S/Inputs/rocm \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=UNSAFE
+
+// Test -fno-hip-fp32-correctly-rounded-divide-sqrt
+// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt \
+// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=DIVSQRT
+
// ALL-NOT: error:
// ALL: {{"[^"]*clang[^"]*"}}
// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc"
@@ -128,3 +152,23 @@
// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc"
// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc"
// INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc"
+
+// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
+// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
+// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
+
+// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
+// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
+// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
+
+// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
+// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
+// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
+
+// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
+// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
+// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_off.bc"
More information about the cfe-commits
mailing list